diff --git a/Online/Hlt2Monitoring/CMakeLists.txt b/Online/Hlt2Monitoring/CMakeLists.txt
index fbfe97b1bf928d24adfe8935b30e09a975455579..377d2faef7ebf5e584b085b6fe8615487fc76ec0 100644
--- a/Online/Hlt2Monitoring/CMakeLists.txt
+++ b/Online/Hlt2Monitoring/CMakeLists.txt
@@ -1,5 +1,5 @@
 ################################################################################
-# Package: Gaucho
+# Package: Hlt2Monitoring
 ################################################################################
 gaudi_subdir(Hlt2Monitoring v1r7)
 
@@ -8,19 +8,37 @@ gaudi_depends_on_subdirs(GaudiKernel
 						 GaudiUtils
                          Online/ZeroMQ)
 
-find_package(Boost COMPONENTS system regex serialization filesystem)
+find_package(Boost COMPONENTS system regex serialization filesystem iostreams program_options)
 find_package(ROOT COMPONENTS Core RIO Hist Thread)
 find_package(AIDA)
 
 include_directories(SYSTEM ${Boost_INCLUDE_DIRS} ${ROOT_INCLUDE_DIRS})
 
+find_path(RANGES_V3_INCLUDE_DIR NAMES range/v3/all.hpp)
+if(NOT RANGES_V3_INCLUDE_DIR)
+  message(FATAL "required headers from range-v3 missing")
+endif()
+
 gaudi_add_library(Hlt2Monitoring src/lib/*.cpp
-                  INCLUDE_DIRS Boost AIDA
+                  INCLUDE_DIRS Boost AIDA ${RANGES_V3_INCLUDE_DIR}
                   PUBLIC_HEADERS Hlt2Monitoring
                   LINK_LIBRARIES GaudiKernel Boost ROOT zmq ZMQ)
 
 gaudi_add_module(Hlt2MonitoringLib src/component/*.cpp
-                 INCLUDE_DIRS Boost AIDA
+                 INCLUDE_DIRS Boost AIDA ${RANGES_V3_INCLUDE_DIR}
                  LINK_LIBRARIES GaudiKernel Boost ROOT zmq ZMQ Hlt2Monitoring GaudiUtilsLib)
 
+# C++ utilities useful for testing or as examples
+gaudi_add_executable(dump_info
+                     test/dump_info.cpp
+                     INCLUDE_DIRS Hlt2Monitoring ROOT Boost ${RANGES_V3_INCLUDE_DIR}
+                     LINK_LIBRARIES Boost ROOT zmq ZMQ Hlt2Monitoring)
+target_compile_definitions(dump_info PUBLIC STANDALONE)
+
+gaudi_add_executable(test_registrar
+                     test/test_registrar.cpp
+                     INCLUDE_DIRS Hlt2Monitoring ROOT Boost ${RANGES_V3_INCLUDE_DIR}
+                     LINK_LIBRARIES Boost ROOT zmq ZMQ Hlt2Monitoring)
+target_compile_definitions(test_registrar PUBLIC STANDALONE)
+
 gaudi_install_python_modules()
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/Histo1DDef.h b/Online/Hlt2Monitoring/Hlt2Monitoring/Histo1DDef.h
index 40e6afb4864ce3a1281bf064908056652990f805..edd41b3dd34511219ae7a7ad5b724cf8d85403d4 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/Histo1DDef.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/Histo1DDef.h
@@ -36,6 +36,27 @@ struct Histo1DDef {
       xlabels = getLabels(axis);
       labels = !xlabels.empty();
    }
+
+   bool operator==(const Histo1DDef& other) const {
+      using namespace ranges;
+
+      if (title != other.title) {
+         return false;
+      } else if (variable != other.variable) {
+         return false;
+      } else if (!variable && !same_bins(xlow, xhigh, xbins,
+                                         other.xlow, other.xhigh, other.xbins)) {
+         return false;
+      } else if (labels != other.labels) {
+         return false;
+      } else if (labels && !same_labels(xlabels, other.xlabels)) {
+         return false;
+      } else if (variable && !same_edges(xedges, other.xedges)) {
+         return false;
+      } else {
+         return true;
+      }
+   }
    
    std::string title;
 
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/Histo2DDef.h b/Online/Hlt2Monitoring/Hlt2Monitoring/Histo2DDef.h
index 3db24f8269d484a4e0db35c00a3e70ce140e58ba..ebd04a134c17a22dc8c276d45fcde5274b0c01a4 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/Histo2DDef.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/Histo2DDef.h
@@ -21,15 +21,45 @@ struct Histo2DDef {
       : title{std::move(t)}
    {
       std::tie(xbins, xlow, xhigh, xedges) = axisDefinition(xaxis);
-      std::tie(ybins, ylow, yhigh, yedges) = axisDefinition(xaxis);
+      std::tie(ybins, ylow, yhigh, yedges) = axisDefinition(yaxis);
       xvariable = !xedges.empty();
-      xvariable = !yedges.empty();
+      yvariable = !yedges.empty();
 
       xlabels = getLabels(xaxis);
       ylabels = getLabels(yaxis);
       labels = (!xlabels.empty() || !ylabels.empty());
    }
-   
+
+   bool operator==(const Histo2DDef& other) const {
+      using namespace ranges;
+
+      if (title != other.title) {
+         return false;
+      } else if (xvariable != other.xvariable) {
+         return false;
+      } else if (yvariable != other.yvariable) {
+         return false;
+      } else if (!xvariable && !same_bins(xlow, xhigh, xbins,
+                                          other.xlow, other.xhigh, other.xbins)) {
+         return false;
+      } else if (!yvariable && !same_bins(ylow, yhigh, ybins,
+                                          other.ylow, other.yhigh, other.ybins)) {
+         return false;
+      } else if (labels != other.labels) {
+         return false;
+      } else if (labels && !same_labels(xlabels, other.xlabels)) {
+         return false;
+      } else if (labels && !same_labels(ylabels, other.ylabels)) {
+         return false;
+      } else if (xvariable && !same_edges(xedges, other.xedges)) {
+         return false;
+      } else if (yvariable && !same_edges(yedges, other.yedges)) {
+         return false;
+      } else {
+         return true;
+      }
+   }
+
    std::string title;
 
    bool xvariable = false;
@@ -73,7 +103,7 @@ void serialize(Archive& archive, Monitoring::Histo2DDef& def, const unsigned int
       archive & def.yhigh;
       archive & def.ybins;
    }
-   
+
    archive & def.labels;
    if (def.labels) {
       archive & def.xlabels;
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/HistoUtils.h b/Online/Hlt2Monitoring/Hlt2Monitoring/HistoUtils.h
index e14423748a71f5d17a8bdb55dfd5346406db96df..6b9e68c506eb7db15f176513068e5ce3c27ba5be 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/HistoUtils.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/HistoUtils.h
@@ -2,13 +2,51 @@
 #define HISTOUTILS_H
 
 #include <vector>
+#include <string>
 #include <tuple>
 
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/functional/hash.hpp>
+
+#include <range/v3/algorithm.hpp>
+#include <range/v3/view.hpp>
+
 namespace Gaudi {
    class Axis;
 }
 
 std::tuple<int, double, double, std::vector<double>> axisDefinition(const Gaudi::Axis& axis);
+
 std::vector<std::string> getLabels(const Gaudi::Axis& axis);
 
+template<typename T, typename std::enable_if<std::is_floating_point<T>::value, T>::type* = nullptr>
+bool same_edges(const std::vector<T>& l, const std::vector<T>& r) {
+   using boost::math::sign;
+   using boost::math::epsilon_difference;
+   if (l.size() != r.size()) {
+      return false;
+   } else {
+      return ranges::all_of(ranges::view::zip(l, r), [](const std::tuple<T, T>& t) {
+            return (boost::math::sign(std::get<0>(t)) == boost::math::sign(std::get<1>(t))
+                    && boost::math::epsilon_difference(std::get<0>(t), std::get<1>(t)) < 2);
+         });
+   }
+}
+
+bool same_bins(double ll, double lh, int lb,
+               double rl, double rh, int rb);
+
+bool same_labels(const std::vector<std::string>& ll,
+                 const std::vector<std::string>& rl);
+
+namespace Monitoring {
+struct Histo1DDef;
+struct Histo2DDef;
+
+   // Hash a Histo1DDef
+size_t hash_value(const Histo1DDef& def);
+size_t hash_value(const Histo2DDef& def);
+}
+
 #endif
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/ITransmitterSvc.h b/Online/Hlt2Monitoring/Hlt2Monitoring/ITransmitterSvc.h
index c92b9ae0e462a102bf3501f6bff3b926c73cb40b..dec5745b1505ad7937a312b125c2211321804a9c 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/ITransmitterSvc.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/ITransmitterSvc.h
@@ -45,6 +45,8 @@ public:
    virtual void trigger(size_t id) = 0;
 
    virtual std::pair<std::string, std::string> application() const = 0;
+   
+   virtual bool ok() const = 0;
 
 };
 #endif // HLT2MONITORING_ITRANSMITTERSVC_H
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/InfoUtils.h b/Online/Hlt2Monitoring/Hlt2Monitoring/InfoUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..9bb982ce754c5c014c2eb75f51a73ad4077dce16
--- /dev/null
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/InfoUtils.h
@@ -0,0 +1,18 @@
+#ifndef INFOUTILS_H
+#define INFOUTILS_H
+
+#include <string>
+
+#include "Types.h"
+
+// Add a histogram to a histogram container
+std::pair<Monitoring::HistoMap::const_iterator, bool>
+addHistogram(Monitoring::HistoMap& histograms,
+             const Monitoring::HistoKey& key,
+             const std::string& type,
+             MonInfo::HistoVariant variant);
+
+// Load histograms from a compressed file.
+size_t loadHistoInfo(Monitoring::HistoMap& histograms, std::string filename);
+
+#endif
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/RunInfo.h b/Online/Hlt2Monitoring/Hlt2Monitoring/RunInfo.h
index d929d44c47933abb798aad93f37ba1bde1158ccc..228be41cd77aa2977b6f64c3458fcacea61f0321 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/RunInfo.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/RunInfo.h
@@ -1,4 +1,3 @@
-
 #ifndef HLT2MONITORING_RUNINFO_H
 #define HLT2MONITORING_RUNINFO_H 1
 
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/SaverUtilities.h b/Online/Hlt2Monitoring/Hlt2Monitoring/SaverUtilities.h
index cc723127a2259e53d13eef4ac47ffe2894f2a31e..f193cd0e003bd4330e8d138166dd312f4af92a7c 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/SaverUtilities.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/SaverUtilities.h
@@ -13,6 +13,7 @@
 #include <boost/functional/hash.hpp>
 #include <boost/unordered_map.hpp>
 #include <boost/multi_index_container.hpp>
+#include <boost/multi_index/ordered_index.hpp>
 #include <boost/multi_index/hashed_index.hpp>
 #include <boost/multi_index/member.hpp>
 #include <boost/multi_index/mem_fun.hpp>
@@ -31,7 +32,8 @@ struct ByRun{ };
 struct ByWorker{ };
 struct ByDir{ };
 struct ByName{ };
-
+struct Sorted{ };
+   
 using HistoKey = std::pair<Monitoring::RunNumber, Monitoring::HistId>;
 
 struct HistoEntry {
@@ -55,6 +57,18 @@ struct HistoEntry {
    bool add = true;
 };
 
+struct SortHistos {
+   bool operator()(const HistoEntry& lhs, const HistoEntry& rhs) const {
+      if (lhs.run < rhs.run) {
+         return true;
+      } else if (rhs.run < lhs.run) {
+         return false;
+      } else {
+         return lhs.name() < rhs.name();
+      }
+   }
+};
+   
 // Multi index container to hold the items.
 using SaverHistos = boost::multi_index_container<
    HistoEntry,
@@ -67,6 +81,11 @@ using SaverHistos = boost::multi_index_container<
          boost::multi_index::tag<ByName>,
          boost::multi_index::const_mem_fun<HistoEntry, std::string, &HistoEntry::name>
          >,
+      boost::multi_index::ordered_unique<
+         boost::multi_index::tag<Sorted>,
+         boost::multi_index::identity<HistoEntry>,
+         SortHistos
+         >,
       boost::multi_index::hashed_non_unique<
          boost::multi_index::tag<ByDir>,
          boost::multi_index::member<HistoEntry, std::string, &HistoEntry::dir>
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/Types.h b/Online/Hlt2Monitoring/Hlt2Monitoring/Types.h
index 4f382b389f8f21ae818170894fb337c1cce93922..a1e85aa72f29a94fd93ed5e69673d1e83bed2d34 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/Types.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/Types.h
@@ -8,30 +8,148 @@
 #include <unordered_set>
 
 #include <boost/variant.hpp>
+#include <boost/functional/hash.hpp>
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/hashed_index.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/member.hpp>
+#include <boost/multi_index/mem_fun.hpp>
+
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/set.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/variant.hpp>
+
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/hashed_index.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/member.hpp>
+#include <boost/multi_index/mem_fun.hpp>
+
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/set.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/variant.hpp>
 
 #include "Common.h"
 #include "Histo1DDef.h"
 #include "Histo2DDef.h"
 #include "CounterDef.h"
+#include "RunInfo.h"
+
+namespace Monitoring {
+   using HistoKey = std::pair<RunNumber, HistId>;
+}
+
+namespace MonInfo {
+
+   struct ByKey{};
+   struct ByContent{};
+
+   // Variant to wrap the different types we want to store as histo info.
+   using HistoVariant = boost::variant<std::string,
+                                       Monitoring::Histo1DDef,
+                                       Monitoring::Histo2DDef>;
+
+   // Visitor to print different types in an info message
+   class Printer : public boost::static_visitor<std::string> {
+   public:
+
+      std::string operator()(const std::string& eval) {
+         return eval;
+      }
+
+      std::string operator()(const Monitoring::Histo1DDef& def) {
+         return def.title;
+      }
+
+      std::string operator()(const Monitoring::Histo2DDef& def) {
+         return def.title;
+      }
+
+      std::string operator()(const Monitoring::CounterDef& def) {
+         return def.name + " " + def.description;
+      }
+   };
+
+   // Node for the multi-index container. Use shared_ptr to save memory.
+   struct HistoEntry {
+      HistoEntry() = default;
+
+      HistoEntry(Monitoring::HistoKey k, std::string t,
+                 size_t h, std::shared_ptr<HistoVariant> c)
+       : key{std::move(k)},
+         type{std::move(t)},
+         hash{h},
+         cnt{std::move(c)} {}
+
+      Monitoring::HistoKey key = {0, 0};
+      std::string type;
+      size_t hash = 0;
+      std::shared_ptr<HistoVariant> cnt;
+
+      const HistoVariant& content() const {
+         return *cnt;
+      }
+   };
+}
+
+namespace boost {
+namespace serialization {
+
+// Serialize HistoEntry
+template <typename Archive>
+auto serialize(Archive& archive, MonInfo::HistoEntry& entry,
+               const unsigned int) -> void {
+  archive& entry.key;
+  archive& entry.type;
+  archive& entry.hash;
+  archive& entry.cnt;
+}
+}
+}
 
 namespace Monitoring {
 // Types used when communicating and storing information about objects.
-using HistoVariant = boost::variant<std::string, Histo1DDef, Histo2DDef>;
 
-using HistoKey = std::pair<RunNumber, HistId>;
+// Multi index container to hold the items.
+using HistoMap = boost::multi_index_container<
+   MonInfo::HistoEntry,
+   boost::multi_index::indexed_by<
+      boost::multi_index::hashed_unique<
+         boost::multi_index::tag<MonInfo::ByKey>,
+         boost::multi_index::member<MonInfo::HistoEntry, HistoKey, &MonInfo::HistoEntry::key>,
+         boost::hash<HistoKey>
+         >,
+      boost::multi_index::ordered_non_unique<
+         boost::multi_index::tag<MonInfo::ByContent>,
+         boost::multi_index::member<MonInfo::HistoEntry, size_t, &MonInfo::HistoEntry::hash>
+         >
+      >
+   >;
+
 using HistoKeys = std::unordered_set<HistoKey, boost::hash<HistoKey>>;
-using HistoPub = std::vector<std::tuple<RunNumber, HistId, std::string, std::string>>;
-using HistoMap = boost::unordered_map<HistoKey, std::pair<std::string, HistoVariant>>;
+using HistoPub = std::vector<std::tuple<std::string, std::string, std::vector<HistoKey>>>;
+
+using KeyHash = boost::hash<Monitoring::HistoKey>;
 
 using CounterKey = HistId;
 using CounterKeys = std::unordered_set<CounterKey>;
 using CounterPub = std::vector<std::tuple<HistId, CounterDef>>;
-using CounterMap = boost::unordered_map<CounterKey, CounterDef>;
+using CounterMap = std::unordered_map<CounterKey, CounterDef, boost::hash<CounterKey>>;
 
 // Run Info
 using RunInfoKey = std::pair<RunNumber, std::string>;
 using RunInfoKeys = std::unordered_set<RunInfoKey, boost::hash<RunInfoKey>>;
-using RunInfoMap = boost::unordered_map<RunInfoKey, std::pair<bool, RunInfo>>;
+using RunInfoMap = std::unordered_map<RunInfoKey, std::pair<bool, RunInfo>, boost::hash<RunInfoKey>>;
 using RunInfoPub = std::vector<std::pair<std::string, RunInfo>>;
 }
 #endif
diff --git a/Online/Hlt2Monitoring/Hlt2Monitoring/Utilities.h b/Online/Hlt2Monitoring/Hlt2Monitoring/Utilities.h
index 5198101be9913df990904e285df29090df71f69e..a830e2e8bdee4a7a815f067070e818b515faf379 100644
--- a/Online/Hlt2Monitoring/Hlt2Monitoring/Utilities.h
+++ b/Online/Hlt2Monitoring/Hlt2Monitoring/Utilities.h
@@ -18,21 +18,15 @@
 #include <boost/regex.hpp>
 
 // ZeroMQ and local
-#ifdef STANDALONE
-#include "zmq.hpp"
-#include "IZeroMQSvc.h"
-#define endmsg endl
-#define MsgStream std::ostream
-#else
 #include <zmq/zmq.hpp>
 #include <ZeroMQ/IZeroMQSvc.h>
-#include <GaudiKernel/MsgStream.h>
-#include <GaudiKernel/ParsersFactory.h>
-#endif
 
 #include "Common.h"
 
 #ifndef STANDALONE
+#include <GaudiKernel/MsgStream.h>
+#include <GaudiKernel/ParsersFactory.h>
+
 namespace Gaudi {
    namespace Parsers {
       // Parser grammar and parse function for CorrectMap
@@ -45,10 +39,6 @@ namespace Gaudi {
 }
 #endif
 
-#ifdef STANDALONE
-IZeroMQSvc* zmqSvc();
-#endif
-
 namespace Monitoring {
 
 unsigned int sourceID(boost::regex regex, std::string host);
diff --git a/Online/Hlt2Monitoring/python/Hlt2Monitoring/Hlt2Adder.py b/Online/Hlt2Monitoring/python/Hlt2Monitoring/Hlt2Adder.py
index 22624008ef97b06378e52bd11c925fe0643eb2b1..fad50d3eefd0375e5270fbc5288ef04f32dcbfaa 100644
--- a/Online/Hlt2Monitoring/python/Hlt2Monitoring/Hlt2Adder.py
+++ b/Online/Hlt2Monitoring/python/Hlt2Monitoring/Hlt2Adder.py
@@ -2,11 +2,12 @@ import os
 import socket
 import re
 
-from Utilities import importOnline, configOnline
+from Utilities import importOnline, configOnline, connectionDirectory
 
 __ports = {'Transmitter': 31348,
            'Adder': {'in': 31347, 'out': 31351},
            'InfoSvc': {'in': 31349, 'out': 31352}}
+__host_regex = r"^hlt(0[12]|(?P<subfarm>[a-f]{1}[0-9]{2})(?P<node>[0-9]{2})?).*"
 
 
 def extraConf(svcs, extra):
@@ -51,7 +52,7 @@ def configureTop(node_info):
         infoSvc.InfoConnection = "ipc:///run/HLT2/MonInfo_0"
         infoSvc.OutPort = ports['InfoSvc']['out']
     infoSvc.RunDBConnection = runDBCon
-    infoSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    infoSvc.OutputLevel = node_info.get('OutputLevel', 2)
 
     # The histogram adder service
     from Configurables import Hlt2AdderSvc
@@ -65,7 +66,8 @@ def configureTop(node_info):
         adderSvc.BackConnection = "ipc:///run/HLT2/Hlt2MonData_0"
     adderSvc.ConnectBack = False
     adderSvc.SendInterval = 210
-    adderSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    adderSvc.ReceiveHighWaterMark = 200000
+    adderSvc.OutputLevel = node_info.get('OutputLevel', 2)
 
     # The root conversion service
     from Configurables import Hlt2RootPublishSvc
@@ -74,7 +76,7 @@ def configureTop(node_info):
     rootSvc.FrontConnection = adderSvc.BackConnection
     rootSvc.BackConnection = "ipc:///run/HLT2/MonData_2"
     rootSvc.InfoConnection = infoSvc.InfoConnection
-    rootSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    rootSvc.OutputLevel = node_info.get('OutputLevel', 2)
 
     # The saver svc
     from Configurables import Hlt2SaverSvc
@@ -84,9 +86,10 @@ def configureTop(node_info):
     saverSvc.RunInfoType = "Moore2"
     saverSvc.DataConnection = rootSvc.BackConnection
     saverSvc.InfoConnection = infoSvc.InfoConnection
+    saverSvc.RegistrarConnection = 'tcp://hist01:31360'
     if 'HistogramDirectory' in node_info:
         saverSvc.BaseDirectory = node_info['HistogramDirectory']
-    saverSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    saverSvc.OutputLevel = node_info.get('OutputLevel', 2)
 
     from Configurables import ZmqTransmitterSvc
     transmitter = ZmqTransmitterSvc()
@@ -97,7 +100,7 @@ def configureTop(node_info):
     for svc in svcs:
         svc.ForceTop = node_info['forced']
 
-    confs = svcs
+    confs = svcs + [transmitter]
     if runDB:
         confs += [runDBSvc]
     return extraConf(confs, node_info.get('extra', {}))
@@ -108,6 +111,8 @@ def configureSubfarm(node_info):
     ports = node_info['ports']
     connections = node_info.get('connections', {})
 
+    con_dir = connectionDirectory()
+
     # The info svc
     from Configurables import Hlt2MonInfoSvc
     infoSvc = Hlt2MonInfoSvc()
@@ -120,6 +125,7 @@ def configureSubfarm(node_info):
         infoSvc.InPort = ports['InfoSvc']['in']
         infoSvc.OutPort = ports['InfoSvc']['out']
     infoSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    infoSvc.IPCConnectionPath = con_dir
 
     # The histogram adder service
     from Configurables import Hlt2AdderSvc
@@ -131,13 +137,19 @@ def configureSubfarm(node_info):
     else:
         adderSvc.FrontConnection = "tcp://*:%d" % ports['Adder']['in']
         adderSvc.BackConnection = 'tcp://hlt02:%d' % ports['Adder']['in']
-    adderSvc.SendInterval = 60
+    adderSvc.ReceiveHighWaterMark = 200000
+    adderSvc.SendInterval = 67
     adderSvc.OutputLevel = node_info.get('OutputLevel', 3)
 
     from Configurables import ZmqTransmitterSvc
-    ZmqTransmitterSvc().InfoPort = ports['Transmitter']
+    transmitter = ZmqTransmitterSvc()
+    transmitter.InfoPort = ports['Transmitter']
+    transmitter.OutputLevel = node_info.get('OutputLevel', 3)
+
+    transmitter.IPCConnectionPath = con_dir
 
-    return extraConf((infoSvc, adderSvc), node_info.get('extra', {}))
+    return extraConf((infoSvc, adderSvc, transmitter),
+                     node_info.get('extra', {}))
 
 
 def configureNode(node_info):
@@ -147,8 +159,8 @@ def configureNode(node_info):
 
     # Cleanup old connection sockets.
     # If there's no running PID that corresponds to the socket, delete it.
+    con_dir = connectionDirectory()
     pids = set([pid for pid in os.listdir('/proc') if pid.isdigit()])
-    con_dir = '/run/HLT2'
     cons = [c for c in os.listdir(con_dir)] if os.path.exists(con_dir) else []
     for con in cons:
         if con.split('_')[-1] in pids:
@@ -169,6 +181,7 @@ def configureNode(node_info):
         infoSvc.InPort = ports['InfoSvc']['in']
         infoSvc.OutPort = ports['InfoSvc']['out']
     infoSvc.OutputLevel = node_info.get('OutputLevel', 3)
+    infoSvc.IPCConnectionPath = con_dir
 
     # The histogram adder service
     from Configurables import Hlt2AdderSvc
@@ -178,19 +191,21 @@ def configureNode(node_info):
         for c, v in cons.iteritems():
             setattr(adderSvc, c + 'Connection', v)
     else:
-        adderSvc.FrontConnection = "ipc:///run/HLT2/MonData_0"
+        adderSvc.FrontConnection = "ipc://%s/MonData_0" % con_dir
         adderSvc.BackConnection = 'tcp://hlt%s:%d' % (
             node_info['subfarm'], ports['Adder']['in'])
 
-    adderSvc.SendInterval = 60
+    adderSvc.ReceiveHighWaterMark = 50000
+    adderSvc.SendInterval = 61
     adderSvc.OutputLevel = node_info.get('OutputLevel', 3)
 
     from Configurables import ZmqTransmitterSvc
     transmitter = ZmqTransmitterSvc()
     transmitter.InfoPort = ports['Transmitter']
     transmitter.OutputLevel = node_info.get('OutputLevel', 3)
+    transmitter.IPCConnectionPath = con_dir
 
-    return extraConf((infoSvc, adderSvc), node_info.get('extra', {}))
+    return extraConf((infoSvc, adderSvc, transmitter), node_info.get('extra', {}))
 
 
 def configure(host_type=None, directory=None, ports=None,
@@ -218,8 +233,7 @@ def configure(host_type=None, directory=None, ports=None,
         node_info['connections'] = connections
 
     hostname = socket.gethostname()
-    host_regex = re.compile(
-        r"hlt(0[12]|(?P<subfarm>[a-f]{1}[0-9]{2})(?P<node>[0-9]{2})?)")
+    host_regex = re.compile(__host_regex)
     r = host_regex.match(hostname)
     ht = ''
     if host_type and host_type in configs:
diff --git a/Online/Hlt2Monitoring/python/Hlt2Monitoring/Utilities.py b/Online/Hlt2Monitoring/python/Hlt2Monitoring/Utilities.py
index 9ebc8d4c5a775de3af4959e6d3f3f1d9d8052df1..5bcc585ff343b4835c791f3097717bcd3059db54 100644
--- a/Online/Hlt2Monitoring/python/Hlt2Monitoring/Utilities.py
+++ b/Online/Hlt2Monitoring/python/Hlt2Monitoring/Utilities.py
@@ -35,9 +35,34 @@ connections = {'Hlt2RootPublishSvc': {'back': "ipc:///run/HLT2/MonData_2"},
                'Hlt2MonInfoSvc': {'back': "ipc:///run/HLT2/MonInfo_1"},
                'Hlt2AdderSvc': {'back': "ipc:///run/HLT2/MonData_1"}}
 
-# The next few lines make the OnlineNodeEnv do nothig, we don't need it anyway.
+
+# Function to create the connection directory at configuration time
+def connectionDirectory():
+    con_dirs = ['/run/HLT2', '/tmp/HLT2']
+    con_dir = None
+    for cd in con_dirs:
+        if os.path.exists(cd):
+            con_dir = cd
+            break
+        else:
+            try:
+                os.makedirs(cd)
+                con_dir = cd
+                break
+            except OSError as e:
+                print '[WARNING]:', e
+                err = ('[WARNING]: Could not create {0}, '
+                       'falling back to {1}'.format(cd, con_dirs[-1]))
+                print err
+                continue
+    if not con_dir:
+        msg = ("Could not create any directory "
+               "to store connections: %s" % con_dirs)
+        raise RuntimeError(msg)
+    return con_dir
 
 
+# The next few lines make the OnlineNodeEnv do nothig, we don't need it anyway.
 class EmptyNodeEnv(object):
 
     def load_node_info(self):
@@ -107,6 +132,7 @@ def configOnline(appMgr, level):
 
     from Configurables import AuditorSvc
     AuditorSvc().Auditors = []
-    configMsgSvc(appMgr, 2 if level == 'top' else 3)
+    # configMsgSvc(appMgr, 2 if level == 'top' else 3)
+    configMsgSvc(appMgr, 2)
     OnlineEnv = importOnline()
     OnlineEnv.end_config(False)
diff --git a/Online/Hlt2Monitoring/scripts/test_node_adder.py b/Online/Hlt2Monitoring/scripts/test_node_adder.py
index 83739b3e2c8d49a00831097ccd3d74adfccf6200..08d4afddb532f0610568f43abc76635a646fe863 100644
--- a/Online/Hlt2Monitoring/scripts/test_node_adder.py
+++ b/Online/Hlt2Monitoring/scripts/test_node_adder.py
@@ -14,18 +14,19 @@ Hlt2Adder.configure('node',
                                              'Info' : 'ipc:///run/HLT2/MonInfoNode'},
                                    'Adder' : {'Front': 'ipc:///run/HLT2/MonData_0',
                                               'Back': 'ipc:///run/HLT2/MonData_1'}},
-                    extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['ipc:///run/HLT2/MonInfoOtherNode'], 'OutputLevel' : 2, 'SyncInterval' : 60}, 'Hlt2AdderSvc' : {'OutputLevel' : 2}})"""
+                    extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['ipc:///run/HLT2/MonInfoOtherNode'],
+                                                 'OutputLevel' : 1,
+                                                 'SyncInterval' : 60},
+                             'Hlt2AdderSvc' : {'OutputLevel' : 2},
+                             'ZmqTransmitterSvc' : {'OutputLevel' : 2}})"""
 
 # cmd = """import GaudiKernel.ProcessJobOptions
 # from Gaudi.Configuration import importOptions
 # GaudiKernel.ProcessJobOptions.printing_level=3
 # from Hlt2Monitoring import Hlt2Adder
-# Hlt2Adder.configure('node', ports = {'Adder' : {'in' : '41347', 'out' :41351}, 'InfoSvc' : {'in' : 41348, 'out' : 41352}},
-#                     connections = {'Info' : {'Front' : 'ipc:///run/HLT2/MonInfo_0', 'Info' : 'tcp://*:41350', 'Back' : 'tcp://hltd04:41348'},
-#                                    'Adder' : {'Front' : 'ipc:///run/HLT2/MonData_0', 'Back' : 'tcp://hltd04:41347', 'Info' : 'tcp://*:41351'}},
-# extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['tcp://hltd04:41350'],
-# 'OutputLevel' : 2, 'SyncInterval' : 60}, 'Hlt2AdderSvc' : {'OutputLevel'
-# : 2}})"""
+# Hlt2Adder.configure(extra = {'Hlt2MonInfoSvc' : {'OutputLevel' : 2},
+#                              'Hlt2AdderSvc' : {'OutputLevel' : 2},
+#                              'ZmqTransmitterSvc' : {'OutputLevel' : 2}})"""
 
 mon_root = os.environ['HLT2MONITORINGROOT']
 os.environ['DIM_DNS_NODE'] = 'localhost'
diff --git a/Online/Hlt2Monitoring/scripts/test_online_adder.py b/Online/Hlt2Monitoring/scripts/test_online_adder.py
index 6f624e82754e3f2495fc3b583660e5091c5ad513..b05dc87b7bf4dfc166506ccdadec70c3373c6a67 100644
--- a/Online/Hlt2Monitoring/scripts/test_online_adder.py
+++ b/Online/Hlt2Monitoring/scripts/test_online_adder.py
@@ -14,13 +14,17 @@ Hlt2Adder.configure('top', '/tmp/histograms',
                                              'Info' : 'ipc:///run/HLT2/MonInfoTop'},
                                    'Adder' : {'Front' : 'ipc:///run/HLT2/MonData_1',
                                               'Back' : 'ipc:///run/HLT2/AddData_2'},
-                                   'RunDB' : 'ipc:///tmp/testRunDB'},
+                                   'RunDB' : 'ipc:///run/HLT2/testRunDB'},
                     extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['ipc:///run/HLT2/MonInfoNode', 'ipc:///run/HLT2/MonInfoOtherNode'],
                                                  'SyncInterval' : 20,
                                                  'OutputLevel' : 2},
                              'Hlt2AdderSvc' : {'OutputLevel' : 2},
-                             'Hlt2RootPublishSvc' : {'OutputLevel' : 2},
+                             'Hlt2RootPublishSvc' : {'OutputLevel' : 2,
+                                                     'BatchSize' : 50,
+                                                     'HighWaterMark' : 200,
+                                                     'PublishInterval' : 50},
                              'Hlt2SaverSvc' : {'OutputLevel' : 2,
+                                               'RunInfoPollTimeout' : 1,
                                                'RunInfoType' : 'Moore2',
                                                'SaveInterval' : 30,
                                                'NWorkers' : 5}})"""
@@ -29,13 +33,17 @@ Hlt2Adder.configure('top', '/tmp/histograms',
 # from Gaudi.Configuration import importOptions
 # GaudiKernel.ProcessJobOptions.printing_level=3
 # from Hlt2Monitoring import Hlt2Adder
-# Hlt2Adder.configure('top', '/tmp/histograms',
-#                     ports = {'Adder' : {'in' : 41347, 'out' :41349}, 'InfoSvc' : {'in' : 41348, 'out' : 41349}},
-#                     connections = {'Info' : {'Front' : 'tcp://*:41348', 'Info' : 'ipc:///run/HLT2/MonInfoTop'},
-#                                    'Adder' : {'Front' : 'tcp://*:41347', 'Back' : 'ipc:///run/HLT2/AddData_2', 'Info' : 'ipc:///run/HLT2/AdderInfo'},
-#                                    'RunDB' : 'ipc:///tmp/HLT2/TestRunDB'},
-# extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['tcp://hltd04:41350'],
-# 'SyncInterval' : 20}})"""
+# Hlt2Adder.configure('top', '/tmp/hlt2histograms',
+#                     connections = {'RunDB' : 'ipc:///run/HLT2/testRunDB'},
+#                     extra = {'Hlt2MonInfoSvc' : {'OutputLevel' : 2},
+#                               'Hlt2AdderSvc' : {'OutputLevel' : 2},
+#                               'Hlt2RootPublishSvc' : {'OutputLevel' : 2},
+#                               'Hlt2SaverSvc' : {'OutputLevel' : 2,
+#                                                 'RunInfoPollTimeout' : 1,
+#                                                 'RunInfoType' : 'Moore2',
+#                                                 'SaveInterval' : 50,
+#                                                 'NWorkers' : 5}})"""
+
 os.environ['DIM_DNS_NODE'] = 'hlt01'
 env = {'LC_ALL': 'C', 'UTGID': utgid, 'TEMPDIR': '/tmp/testAdder', 'PARTITION': 'LHCb2',
        'PARTITION_NAME': 'LHCb2', 'RUNINFO': '%s/scripts/OnlineEnvBase.py' % os.environ['HLT2MONITORINGROOT']}
diff --git a/Online/Hlt2Monitoring/scripts/test_subfarm_adder.py b/Online/Hlt2Monitoring/scripts/test_subfarm_adder.py
index 8d6c5ad26a936424e8d5a5c7c8fe31252934430d..a13e22579c016dc1fa026ffcfeff5c36cf5eeaea 100644
--- a/Online/Hlt2Monitoring/scripts/test_subfarm_adder.py
+++ b/Online/Hlt2Monitoring/scripts/test_subfarm_adder.py
@@ -1,5 +1,5 @@
 import os
-from Manager import Manager
+from Hlt2Monitoring.Manager import Manager
 
 utgid = 'TEST_SUBFARMMADDER_00'
 
@@ -7,9 +7,9 @@ cmd = """import GaudiKernel.ProcessJobOptions
 from Gaudi.Configuration import importOptions
 GaudiKernel.ProcessJobOptions.printing_level=3
 from Hlt2Monitoring import Hlt2Adder
-Hlt2Adder.configure(ports = {'Adder' : {'in' : 41347, 'out' :41349},
-                             'InfoSvc' : {'in' : 41351, 'out' : 41352},
-                             'Transmitter' : 41339})
+Hlt2Adder.configure(extra = {'Hlt2MonInfoSvc' : {'OutputLevel' : 2},
+                             'Hlt2AdderSvc' : {'OutputLevel' : 2},
+                             'ZmqTransmitterSvc' : {'OutputLevel' : 2}})
 """
 
 """import GaudiKernel.ProcessJobOptions
diff --git a/Online/Hlt2Monitoring/scripts/test_sync_adder.py b/Online/Hlt2Monitoring/scripts/test_sync_adder.py
index 5874ef1eacc9b2e4fb3ae944b6d602dc75d14a8d..35984aba15722b69a6dcb8f1722b28277e24123d 100644
--- a/Online/Hlt2Monitoring/scripts/test_sync_adder.py
+++ b/Online/Hlt2Monitoring/scripts/test_sync_adder.py
@@ -18,9 +18,10 @@ Hlt2Adder.configure('subfarm',
                                              'Back' : 'ipc:///run/HLT2/MonInfoOther_1'},
                                    'Adder' : {'Front' : 'ipc:///run/HLT2/MonDataOther_0',
                                               'Back' : 'ipc:///run/HLT2/MonDataOther_1'}},
-                    extra = {'Hlt2MonInfoSvc' : {'SyncConnections' : ['ipc:///run/HLT2/MonInfoNode'],
-                             'OutputLevel' : 2,
-                             'SyncInterval' : 60}})"""
+                    extra = {'Hlt2MonInfoSvc' : {'OutputLevel' : 2,
+                                                 'OutPort' : 31352,
+#                                                'SyncConnections' : ['ipc:///run/HLT2/MonInfoNode'],
+                                                 'SyncInterval' : 60}})"""
 
 os.environ['DIM_DNS_NODE'] = 'localhost'
 mon_root = os.environ['HLT2MONITORINGROOT']
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.cpp b/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.cpp
index eb5323052def047bdde59019df6be5c76e3b89e6..862e0f5f7f5b872826aa238c3574a31e5d0ea7fc 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.cpp
@@ -54,11 +54,14 @@ DECLARE_SERVICE_FACTORY(Hlt2AdderSvc)
 
 //=============================================================================
 Hlt2AdderSvc::Hlt2AdderSvc(const string& name, ISvcLocator* loc)
- : Hlt2MonBaseSvc(name, loc),
-   m_stopSending{false}
+ : Hlt2MonBaseSvc(name, loc)
 {
    declareProperty("SendInterval", m_sendInterval = 10);
    declareProperty("ConnectBack", m_connectBack = true);
+   declareProperty("PublishInterval", m_publishInterval = 60);
+   declareProperty("BatchSize", m_batchSize = 500);
+   declareProperty("HighWaterMark", m_hwm = 5000);
+   declareProperty("ReceiveHighWaterMark", m_rcvHwm = 200000);
 }
 
 //===============================================================================
@@ -104,6 +107,7 @@ void Hlt2AdderSvc::function() {
    front.bind(m_frontCon.c_str());
    setsockopt(front, zmq::LINGER, 0);
    zmq::setsockopt(front, zmq::SUBSCRIBE, "");
+   zmq::setsockopt(front, zmq::RCVHWM, boost::numeric_cast<int>(m_rcvHwm));
    info() << "Bound frontend to: " << m_frontCon << endmsg;
 
    // Clean up queue
@@ -153,36 +157,36 @@ void Hlt2AdderSvc::function() {
    publish.bind(pubCon().c_str());
 
    // Start thread to trigger publication of histograms
-   std::thread pubThread([this] { periodic(pubCon(), Monitoring::s_Publish,
-                                           m_stopSending, m_sendInterval); });
+   std::thread pubThread([this] { periodic(pubCon(), Monitoring::s_Publish, 0.5); });
+   size_t pubCounter = m_publishInterval * 2;
+   size_t counterPubCounter = m_publishInterval * 2;
 
    //  Initialize poll set
    zmq::pollitem_t items[] = {{control, 0, ZMQ_POLLIN, 0},
                               {front, 0, ZMQ_POLLIN, 0},
                               {publish, 0, ZMQ_POLLIN, 0}};
 
-   bool paused = false;
+   // Storage
+   Histograms histograms;
+   Counters counters;
+   Queue messages;
+   std::vector<Key> toSend;
 
    while (true) {
       //  Process messages from both sockets
       zmq::message_t message;
 
-      if (!paused) zmq::poll(&items[0], 3, -1);
+      zmq::poll(&items[0], 3, -1);
 
-      if (paused || (items[0].revents & ZMQ_POLLIN)) {
+      if (items[0].revents & ZMQ_POLLIN) {
          auto cmd = receive<std::string>(control);
          if (cmd == Monitoring::s_Terminate) {
+            send(publish, Monitoring::s_Terminate);
             break;
-         } else if (cmd == "PAUSE") {
-            debug() << name() << " paused." << endmsg;
-            paused = true;
-         } else if (cmd == "RESUME") {
-            debug() << name() << " resumed." << endmsg;
-            paused = false;
          }
       }
 
-      if (!paused && (items[1].revents & ZMQ_POLLIN)) {
+      if (items[1].revents & ZMQ_POLLIN) {
          // Deserialize
          auto msg = receive<zmq::message_t>(front);
          auto type = decode<string>(msg);
@@ -196,11 +200,11 @@ void Hlt2AdderSvc::function() {
                continue;
             }
 
-            key_t key{hdiff.runNumber, hdiff.histId};
+            Key key{hdiff.runNumber, hdiff.histId};
             // Add to internal store
-            auto it = m_histograms.find(key);
-            if (it == end(m_histograms)) {
-               auto r = m_histograms.emplace(std::move(key), HistDiff{m_sourceID, hdiff.runNumber, hdiff.histId});
+            auto it = histograms.find(key);
+            if (it == end(histograms)) {
+               auto r = histograms.emplace(std::move(key), HistDiff{m_sourceID, hdiff.runNumber, hdiff.histId});
                assert(r.second);
                it = r.first;
             }
@@ -209,39 +213,39 @@ void Hlt2AdderSvc::function() {
             auto start = receive<long>(front);
             auto last = receive<long>(front);
             auto key = make_pair(start, last);
-            std::map<size_t, StatEntity> counters;
+            std::map<size_t, StatEntity> recvCounters;
             msg = receive<zmq::message_t>(front);
             try {
-               counters = decode<decltype(counters)>(msg);
+               recvCounters = decode<decltype(recvCounters)>(msg);
             } catch (boost::archive::archive_exception) {
                warning() << "Faulty CounterDiff, ignoring " << endmsg;
                continue;
             }
 
             // Check if there is a set of counters with
-            auto it = m_counters.find(key);
-            if (it == end(m_counters)) {
-               auto count = begin(m_counters);
-               if (!m_counters.empty() && count->first.first > last) {
+            auto it = counters.find(key);
+            if (it == end(counters)) {
+               auto count = begin(counters);
+               if (!recvCounters.empty() && count->first.first > last) {
                   warning() << "Received counters for time [" << start << ", "
                             << last << "), which is before first stored counter ["
                             << count->first.first << ", " << count->first.second << endmsg;
                } else {
-                  auto r = m_counters.emplace(key, std::move(counters));
+                  auto r = counters.emplace(key, std::move(recvCounters));
                   if (!r.second) assert(false);
                }
             } else {
                auto& storedCounters = it->second;
                // Check consistency
                ++it;
-               if (it != end(m_counters) && it->first.first != last) {
+               if (it != end(counters) && it->first.first != last) {
                   warning() << "There is a gap in the set of counters: " << start << " "
                             << last << " " << it->first.first << endmsg;
                } else {
-                  debug() << "Received " << counters.size() << " counters for time interval ["
+                  debug() << "Received " << recvCounters.size() << " counters for time interval ["
                           << start << ", " << last << ")." << endmsg;
                }
-               for (const auto& entry : counters) {
+               for (const auto& entry : recvCounters) {
                   Monitoring::HistId counterKey = entry.first;
                   // Add to internal store
                   auto counterIt = storedCounters.find(counterKey);
@@ -266,71 +270,120 @@ void Hlt2AdderSvc::function() {
 
       }
 
-      if (!paused && items[2].revents & ZMQ_POLLIN) {
+      if (items[2].revents & ZMQ_POLLIN) {
          auto cmd = receive<std::string>(publish);
          if (cmd == Monitoring::s_Publish) {
-            // If there is no separate counter socket, use the hist
-            // socket. This happens if the back connection is bound
-            // instead of connected.
-            auto n = (publishHistDiffs(*histOut, histID) +
-                      publishCounterDiffs(counterOut ? *counterOut : *histOut,
-                                          counterID));
-            for (const auto& id : {histID, counterID}) {
+            if (pubCounter > 0) {
+               --pubCounter;
+            }
+
+            if (pubCounter == 0 && messages.empty()
+                && toSend.empty() && !histograms.empty()) {
+               toSend.reserve(histograms.size());
+               for (const auto& entry : histograms) {
+                  toSend.push_back(entry.first);
+               }
+               pubCounter = m_publishInterval * 2;
+            }
+
+            if (messages.empty() && !toSend.empty()) {
+               while (messages.size() < m_hwm && !toSend.empty()) {
+                  const auto& key = toSend.back();
+                  auto it = histograms.find(key);
+                  if (it != end(histograms)) {
+                     messages.emplace_back(zmq().encode(it->second));
+                     histograms.erase(it);
+                  } else {
+                     warning() << "Requested to send key " << key.first << " " << key.second
+                               << ", but it is not in histograms." << endmsg;
+                  }
+                  toSend.pop_back();
+               }
+               debug() << "Created " << messages.size() << " messages " << endmsg;
+            }
+
+            if (!messages.empty()){
+               // Ask the TransmitterSvc how many messages we can send.
+               // If we've bound our output socket, use the batch size
+               // property until we can talk to the publish service.
+               size_t n = 0;
+               if (m_connectBack) {
+                  send(*histOut, Monitoring::s_Check, zmq::SNDMORE);
+                  send(*histOut, Monitoring::s_HistDiff, zmq::SNDMORE);
+                  send(*histOut, *histID);
+
+                  zmq::pollitem_t checkItems[] = {{*histOut, 0, ZMQ_POLLIN, 0}};
+                  zmq::poll(&checkItems[0], 1, 500);
+                  if (checkItems[0].revents & ZMQ_POLLIN) {
+                     n = receive<size_t>(*histOut);
+                     n = std::min(messages.size(), n);
+                  } else {
+                     warning() << "Poll for credit to transmitter service timed out." << endmsg;
+                  }
+               } else {
+                  n = std::min(messages.size(), m_batchSize);
+               }
+
+               if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+                  verbose() << "Sending " << n << " messages " << endmsg;
+               }
+
+               for (size_t m = 0; m < n; ++m) {
+                  auto& message = messages.front();
+
+                  // Send the message
+                  send(*histOut, Monitoring::s_HistDiff, zmq::SNDMORE);
+                  if (histID) send(*histOut, *histID, zmq::SNDMORE);
+                  send(*histOut, message);
+
+                  messages.pop_front();
+               }
+               // Don't trigger too often
+               if (histID && m_connectBack
+                   && (pubCounter % m_publishInterval) == 0) {
+                  m_transmitter->trigger(*histID);
+               }
+            }
+
+            if (UNLIKELY(msgLevel(MSG::VERBOSE)) && toSend.empty()) {
+               verbose() << "No messages to send." << endmsg;
+            }
+
+            // Publish counters
+            if (counterPubCounter == 0 && !counters.empty()) {
+               // If there is no separate counter socket, use the hist
+               // socket. This happens if the back connection is bound
+               // instead of connected.
+               auto n = publishCounterDiffs(counterOut ? *counterOut : *histOut,
+                                            counters, counterID);
 #if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
-               // This line results in a false-positive warning from
-               // some gcc versions on some builds.
-               if (id && m_connectBack && (n != 0)) m_transmitter->trigger(*id);
+               if (counterID && m_connectBack && (n != 0)) m_transmitter->trigger(*counterID);
 #if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
+               counterPubCounter = m_publishInterval * 2;
             }
          }
       }
    }
 
-   m_stopSending = true;
    pubThread.join();
 }
 
-//===============================================================================
-size_t Hlt2AdderSvc::publishHistDiffs(zmq::socket_t& socket,
-                                      boost::optional<size_t> id) const {
-   size_t n = 0;
-
-   for (auto it = begin(m_histograms), last = end(m_histograms); it != last;) {
-      if (it->second.binDiffs.empty()) {
-         verbose() << "Pruning histogram " << it->first.first << " " << it->first.second << endmsg;
-         it = m_histograms.erase(it);
-      } else {
-         // Serialize
-         send(socket, Monitoring::s_HistDiff, zmq::SNDMORE);
-         if (id) send(socket, *id, zmq::SNDMORE);
-         send(socket, it->second);
-
-         // Zero out histogram
-         it->second.binDiffs.clear();
-         ++it;
-         ++n;
-      }
-   }
-
-   debug() << "Published " << n << " histogram diffs" << endmsg;
-   return n;
-}
-
 //===============================================================================
 size_t Hlt2AdderSvc::publishCounterDiffs(zmq::socket_t& socket,
+                                         Counters& counters,
                                          boost::optional<size_t> id) const {
    size_t n = 0;
 
-   auto nRemove = m_counters.size() > 5 ? m_counters.size() - 5 : 0;
-   for (auto it = begin(m_counters), last = end(m_counters); it != last;) {
+   auto nRemove = counters.size() > 5 ? counters.size() - 5 : 0;
+   for (auto it = begin(counters), last = end(counters); it != last;) {
       if (nRemove != 0) {
          verbose() << "Pruning counters at [" << it->first.first << ", " << it->first.second << ")" << endmsg;
-         it = m_counters.erase(it);
+         it = counters.erase(it);
          --nRemove;
       } else {
          // Serialize
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.h b/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.h
index 39e165734833f7d5638a3d040f43e71d12f6bc8c..8833dead523dbf3d5d1129d6539380d4046f0b22 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.h
+++ b/Online/Hlt2Monitoring/src/component/Hlt2AdderSvc.h
@@ -31,19 +31,24 @@
  */
 class Hlt2AdderSvc : public Hlt2MonBaseSvc {
 public:
+
+   using Key = std::pair<Monitoring::RunNumber, Monitoring::HistId>;
+   using Histograms = boost::unordered_map<Key, Monitoring::HistDiff>;
+   using Counters = std::map<std::pair<long, long>, std::map<Monitoring::HistId, StatEntity>>;
+   using Queue = std::deque<zmq::message_t>;
+
    /// Standard constructor
    Hlt2AdderSvc(const std::string& name, ISvcLocator* sl);
 
    StatusCode initialize() override;
-   
+
    // The function that does the work
    void function() override;
 
 private:
 
-   size_t publishHistDiffs(zmq::socket_t& socket,
-                           boost::optional<size_t> id) const;
    size_t publishCounterDiffs(zmq::socket_t& socket,
+                              Counters& counters,
                               boost::optional<size_t> id) const;
 
    std::string pubCon() const {
@@ -56,14 +61,13 @@ private:
    bool m_connectBack;
    boost::optional<zmq::socket_t> m_histOut;
    boost::optional<zmq::socket_t> m_counterOut;
-   
+   unsigned int m_publishInterval;
+   size_t m_batchSize;
+   size_t m_hwm;
+   size_t m_rcvHwm;
+
    // data members
    unsigned int m_sourceID;
-   std::atomic<bool> m_stopSending;
-
-   using key_t = std::pair<Monitoring::RunNumber, Monitoring::HistId>;
-   mutable boost::unordered_map<key_t, Monitoring::HistDiff> m_histograms;
-   mutable std::map<std::pair<long, long>, std::map<Monitoring::HistId, StatEntity>> m_counters;
 
    // Job Info
    using JobKey = std::tuple<unsigned int, std::string, std::string, std::string>;
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.cpp b/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.cpp
index 5506b5e2c8b15af57b2a63e1a907c4604dbb7f67..13edfc17bb896098f57a00ee8e70fada71d426f6 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.cpp
@@ -56,7 +56,7 @@ Hlt2MonBaseSvc::Hlt2MonBaseSvc(const string& name, ISvcLocator* loc, bool bindCo
    declareProperty("ForceTop", m_forceTop = false);
    declareProperty("PartitionName", m_partition);
    declareProperty("RunInPartitions", m_partitions = {"LHCb2"});
-   declareProperty("HostnameRegex", m_hostRegex = "hlt(0[12]|(?<subfarm>[a-f][0-9]{2})(?<node>[0-9]{2})?)");
+   declareProperty("HostnameRegex", m_hostRegex = "^hlt(0[12]|(?<subfarm>[a-f][0-9]{2})(?<node>[0-9]{2})?).*");
    declareProperty("CheckInterval", m_checkInterval = 10);
 }
 
@@ -201,20 +201,32 @@ Hlt2MonBaseSvc::receiveRunAndId(zmq::socket_t& socket, bool* more) const
 
 //===============================================================================
 void Hlt2MonBaseSvc::periodic(const std::string& connection, std::string message,
-                              const std::atomic<bool>& stop, const unsigned int interval) {
+                              const double interval) {
    zmq::socket_t publish = socket(zmq::PAIR);
    zmq::setsockopt(publish, zmq::LINGER, 0);
    publish.connect(connection.c_str());
 
-   while (!stop) {
-      unsigned int n = 0;
-      while (n < interval) {
-         if (stop) break;
-         std::chrono::seconds one{1};
-         std::this_thread::sleep_for(one);
-         ++n;
+   zmq::pollitem_t items [] = {
+      { publish, 0, ZMQ_POLLIN, 0 },
+   };
+
+   auto iv = boost::numeric_cast<long>(1000 * interval);
+   if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+      debug() << "Sending " << message << " messages with interval "
+              << iv << " ms." << endmsg;
+   }
+   while (true) {
+      zmq::poll(&items[0], 1, iv);
+      if (items[0].revents & ZMQ_POLLIN) {
+         auto msg = zmq().receive<string>(publish);
+         if (msg == Monitoring::s_Terminate) {
+            break;
+         } else {
+            warning() << "Publish thread got unknown message: "
+                      << msg << endmsg;
+         }
       }
-      if (!stop) send(publish, message);
+      send(publish, message);
    }
 }
 
@@ -291,7 +303,7 @@ bool Hlt2MonBaseSvc::checkRequest(Monitoring::JobInfo jobInfo) const
    try {
       check.connect(jobInfo.connection.c_str());
    } catch (const zmq::error_t& e) {
-      error() << "Failed to connect to " << jobInfo.connection 
+      error() << "Failed to connect to " << jobInfo.connection
               << " to respond to a check request." << endmsg;
       m_jobInfo.erase(key);
       return false;
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.h b/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.h
index a895d22ea1aa7f31819c05ef4db6114e385d1f83..1bb091e7d8e59bc56b9cd4a003bba368532974ba 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.h
+++ b/Online/Hlt2Monitoring/src/component/Hlt2MonBaseSvc.h
@@ -94,9 +94,9 @@ protected:
    void enable()  { m_enabled = true; }
 
 
-   void periodic(const std::string& connection, std::string message,
-                 const std::atomic<bool>& stop,
-                 const unsigned int interval);
+   void periodic(const std::string& connection,
+                 std::string message,
+                 const double interval);
 
    bool checkRequest(Monitoring::JobInfo jobInfo) const;
 
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.cpp b/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.cpp
index 526fe94d225b2d31ef6e3b6bd1015a205588a06e..e3876708acf1ce45e241f92fdf23d993d1515cc7 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.cpp
@@ -10,6 +10,7 @@
 // boost
 #include <boost/regex.hpp>
 #include <boost/functional/hash.hpp>
+#include <boost/optional.hpp>
 
 // zeromq
 #include "zmq/zmq.hpp"
@@ -29,6 +30,7 @@
 #include <Hlt2Monitoring/Histo1DDef.h>
 #include <Hlt2Monitoring/Histo2DDef.h>
 #include <Hlt2Monitoring/Utilities.h>
+#include <Hlt2Monitoring/InfoUtils.h>
 
 // Local
 #include "Hlt2MonInfoSvc.h"
@@ -57,6 +59,7 @@ namespace {
    using std::unordered_set;
 
    using Monitoring::hostname;
+   using Monitoring::HistoKey;
    using Monitoring::HistoKeys;
    using Monitoring::HistoPub;
    using Monitoring::HistoMap;
@@ -71,6 +74,11 @@ namespace {
    using Monitoring::Histo2DDef;
    using Monitoring::CounterDef;
 
+   using MonInfo::ByKey;
+   using MonInfo::ByContent;
+   using MonInfo::HistoVariant;
+   using MonInfo::HistoEntry;
+
 }
 
 // Factory for instantiation of service objects
@@ -91,12 +99,10 @@ Hlt2MonInfoSvc::Hlt2MonInfoSvc(const string& name, ISvcLocator* loc)
    declareProperty("RunDBRetires", m_runDBRetries = 5);
    declareProperty("SendInterval", m_sendInterval = 5);
    declareProperty("ForceSourceID", m_sourceID = 0);
-}
-
-//===============================================================================
-Hlt2MonInfoSvc::~Hlt2MonInfoSvc()
-{
-
+   declareProperty("IPCConnectionPath", m_connectionPath = "/run/HLT2");
+   declareProperty("SyncBatchSize", m_syncBatchSize = 100);
+   declareProperty("SyncTries", m_syncTries = 5);
+   declareProperty("LoadFrom", m_inputFile);
 }
 
 //===============================================================================
@@ -148,9 +154,9 @@ StatusCode Hlt2MonInfoSvc::initialize()
          m_randomSync = m_syncConnections.empty();
       } else {
          // node
-         m_frontCon = string("ipc:///run/HLT2/MonInfo_0");
-         m_infoCon  = string("tcp://*:") + to_string(m_outPort);
-         m_backCon  = string("tcp://hlt") + matches.str("subfarm") + ":" + to_string(m_inPort);
+         m_frontCon = string{"ipc://"} + m_connectionPath + "/MonInfo_0";
+         m_infoCon  = string{"tcp://*:"} + to_string(m_outPort);
+         m_backCon  = string{"tcp://hlt"} + matches.str("subfarm") + ":" + to_string(m_inPort);
       }
    } else {
       fatal() << "Could not determine hostname." << endmsg;
@@ -172,6 +178,11 @@ StatusCode Hlt2MonInfoSvc::initialize()
 
    // Sort subfarms, lexically is good enough
    std::sort(begin(m_subfarms), end(m_subfarms));
+
+   // Remove ourselves from the list of subfarms.
+   auto it = std::find(begin(m_subfarms), end(m_subfarms), m_hostname);
+   if (it != end(m_subfarms)) m_subfarms.erase(it);
+
    return sc;
 }
 
@@ -182,7 +193,7 @@ void Hlt2MonInfoSvc::synchroniser()
    zmq::setsockopt(syncer, zmq::LINGER, 0);
    syncer.connect(syncCon().c_str());
 
-   auto hostDown = [this](const string& con) -> bool {
+   auto hostUp = [this](const string& con) -> bool {
       zmq::socket_t ping = socket(zmq::REQ);
       zmq::setsockopt(ping, zmq::LINGER, 0);
       zmq::setsockopt(ping, zmq::RCVTIMEO, 100);
@@ -205,46 +216,17 @@ void Hlt2MonInfoSvc::synchroniser()
       if (UNLIKELY(msgLevel(MSG::DEBUG)) && r.empty()) {
          debug() << con << " is down." << endmsg;
       }
-      return r.empty();
+      return !r.empty();
    };
 
-   auto checkHosts = [&hostDown](decltype(m_syncConnections)& cons) {
-      for (auto it = begin(cons), last = end(cons); it != last;) {
-         if (hostDown(*it)) {
-            it = cons.erase(it);
-         } else {
-            ++it;
-         }
-      }
-   };
-
-   int i = 0;
-   map<size_t, string> subfarms;
-   for (auto s : m_subfarms)
-      subfarms[i++] = s;
-
    std::mt19937 gen{std::hash<string>{}(m_hostname)};
 
-   auto genHosts = [&subfarms, &gen, this](const size_t n) {
-      auto connection = [this](const string& host) {
-         return string{"tcp://"} + host + ":" + to_string(m_outPort);
-      };
-      std::uniform_int_distribution<size_t> sf{0, m_subfarms.size() - 1};
-
-      while (m_syncConnections.size() < n) {
-         auto it = subfarms.find(sf(gen));
-         if (it == end(subfarms)) continue;
-         if (it->second == m_hostname) continue;
-         m_syncConnections.emplace(connection(it->second));
-      }
-   };
-
    auto sleep_for = [this](const unsigned int m) {
       unsigned int n = 0;
-      while (n < m) {
+      while (n < 10 * m) {
          if (m_stopSync) break;
-         std::chrono::seconds one{1};
-         std::this_thread::sleep_for(one);
+         std::chrono::milliseconds hundred{100};
+         std::this_thread::sleep_for(hundred);
          ++n;
       }
    };
@@ -255,19 +237,37 @@ void Hlt2MonInfoSvc::synchroniser()
    decltype(m_syncConnections) connections{m_syncConnections};
 
    while (!m_stopSync) {
-      if (!m_randomSync) {
-         m_syncConnections = connections;
-      }
-
-      checkHosts(m_syncConnections);
       if (m_randomSync) {
-         genHosts(std::min(m_subfarms.size(), size_t{m_sync}));
+         auto subfarms = m_subfarms;
+         std::mt19937 gen{std::hash<string>{}("dump_info")};
+
+         auto connection = [this](const string& host) -> string {
+            return string{"tcp://"} + host + ":" + to_string(m_outPort);
+         };
+
+         vector<string>::iterator last = subfarms.end() - 1, first = subfarms.begin();
+         while (connections.size() < m_sync && last != first - 1) {
+            size_t s = distance(first, last);
+            auto it = first + std::uniform_int_distribution<size_t>{0, s ? s - 1 : 0}(gen);
+            auto con = connection(*it);
+
+            if (hostUp(con)) {
+               connections.emplace(con);
+            }
+            std::swap(*it, *last);
+            --last;
+         }
+
+      } else {
+         std::copy_if(begin(m_syncConnections), end(m_syncConnections),
+                      std::inserter(connections, connections.end()), hostUp);
       }
 
-      if (m_syncConnections.empty()) {
+      if (connections.empty()) {
          debug() << "Could not find any hosts to synchronise info with." << endmsg;
       } else {
-         send(syncer, Monitoring::s_Sync);
+         send(syncer, Monitoring::s_Sync, zmq::SNDMORE);
+         send(syncer, connections);
       }
       sleep_for(m_syncInterval);
    }
@@ -300,117 +300,130 @@ void Hlt2MonInfoSvc::runDB() const
    using Pair = std::pair<Monitoring::RunNumber, string>;
    std::unordered_set<Pair, boost::hash<Pair>> runs;
    std::unordered_map<Monitoring::RunNumber, boost::optional<double>> deadtimes;
+   std::unordered_set<Pair, boost::hash<Pair>> toCheck;
 
    bool done = false;
 
    while (!done) {
 
-      bool request = false;
-      Monitoring::RunNumber run = 0;
-      size_t n = 0;
-      for (const auto& entry : deadtimes) {
-         n += bool(entry.second);
-      }
-      if (runs.size() && (runs.size() != n)) {
-         for (auto r : runs) {
-            auto it = deadtimes.find(r.first);
-            if (it != end(deadtimes) && bool(it->second)) {
-               continue;
-            }
-            if (UNLIKELY(msgLevel(MSG::VERBOSE)))
-               verbose() << "Requesting info for run: " << r.first << " "
-                         << r.second << endmsg;
-            send(rdb, Monitoring::s_RunInfo, zmq::SNDMORE);
-            send(rdb, r.first, zmq::SNDMORE);
-            send(rdb, r.second);
-            request = true;
-            run = r.first;
-            break;
+      toCheck.clear();
+
+      // Find the runs for which we have no deadtime yet.
+      for (const auto& entry : runs) {
+         auto it = deadtimes.find(entry.first);
+         if ((it == end(deadtimes) || !(it->second))) {
+            toCheck.emplace(entry);
          }
       }
 
-      size_t tries = 0;
-      bool reply = false;
-      while (tries < m_runDBRetries) {
-         bool other = false;
+      auto checkIt = begin(toCheck);
 
-         zmq::pollitem_t items[] = {
-            {rdb, 0, ZMQ_POLLIN, 0},
-            {rep, 0, ZMQ_POLLIN, 0},
-         };
+      // We need to keep track of whether a request has been sent already,
+      // so we don't send another one if we receive a new run while waiting
+      // for the reply on a previous request.
+      bool request = false;
 
-         //  Poll socket for a reply, with timeout if we are waiting for some runs
-         zmq::poll(&items[0], 2, (request ? 200 : -1));
-
-         //  If we got a reply, process it
-         if (items[0].revents & ZMQ_POLLIN) {
-            auto known = receive<string>(rdb);
-            debug() << "New reply from run DB server: " << known << endmsg;
-            reply = true;
-            boost::optional<double> dt{};
-            if (known == Monitoring::s_Unknown) {
-               warning() << "RunDBServ does have info for run " << run << endmsg;
-            } else {
-               auto info = receive<Monitoring::RunInfo>(rdb);
-               assert(info.run == run);
-               if (info.deadtime >= 0.) {
-                  dt = info.deadtime;
+      while (checkIt != end(toCheck) || toCheck.empty()) {
+         size_t tries = 0;
+         while (true) {
+
+            zmq::pollitem_t items[] = {
+               {rdb, 0, ZMQ_POLLIN, 0},
+               {rep, 0, ZMQ_POLLIN, 0},
+            };
+
+            boost::optional<Monitoring::RunNumber> run;
+            boost::optional<string> app;
+
+            if (checkIt != end(toCheck)) {
+               run = checkIt->first;
+               app = checkIt->second;
+               if (!request) {
                   if (UNLIKELY(msgLevel(MSG::DEBUG)))
-                     debug() << "Deadtime from run DB for run: " << info.run << " "
-                             << info.deadtime << endmsg;
-               } else {
-                  warning() << "Got reply from RunDBServ for run " << info.run
-                            << ", but deadtime is not known." << endmsg;
+                     debug() << "Requesting info for run: " << *run << " "
+                             << *app << " tries " << tries << endmsg;
+                  send(rdb, Monitoring::s_RunInfo, zmq::SNDMORE);
+                  send(rdb, *run, zmq::SNDMORE);
+                  send(rdb, *app);
+                  request = true;
                }
             }
-            deadtimes[run] = dt;
-         }
 
-         if (items[1].revents & ZMQ_POLLIN) {
-            other = true;
-            bool more = true;
-            auto type = receive<string>(rep, &more);
-            if (type == Monitoring::s_Command) {
-               auto cmd = receive<string>(rep);
-               if (cmd == "TERMINATE") {
-                  send(rep, true);
-                  done = true;
-                  break;
+            // Poll socket for a reply, with timeout if we are waiting for some runs
+            // If a run request passes by in the middle, we simply poll a bit longer.
+            // As those are not very frequest and come in batches, that's fine.
+            auto nRep = zmq::poll(&items[0], 2, (toCheck.empty() ? -1 : 500));
+
+            if (items[1].revents & ZMQ_POLLIN) {
+               auto type = receive<string>(rep);
+               if (type == Monitoring::s_Command) {
+                  auto cmd = receive<string>(rep);
+                  if (cmd == "TERMINATE") {
+                     send(rep, true);
+                     done = true;
+                     break;
+                  }
+               } else if (type == Monitoring::s_RunInfo) {
+                  auto reqRun = receive<Monitoring::RunNumber>(rep);
+                  auto application = receive<string>(rep);
+                  auto it = deadtimes.find(reqRun);
+                  if (it != end(deadtimes) && bool(it->second)) {
+                     send(rep, true, zmq::SNDMORE);
+                     send(rep, *it->second);
+                  } else {
+                     send(rep, false);
+                     runs.emplace(reqRun, application);
+                     toCheck.emplace(reqRun, application);
+                  }
                }
-            } else if (type == Monitoring::s_RunInfo) {
-               auto reqRun = receive<Monitoring::RunNumber>(rep);
-               auto app = receive<string>(rep);
-               auto it = deadtimes.find(reqRun);
-               if (it != end(deadtimes) && bool(it->second)) {
-                  send(rep, true, zmq::SNDMORE);
-                  send(rep, *it->second);
+            }
+
+            //  If we got a reply, process it
+            if (items[0].revents & ZMQ_POLLIN) {
+               auto known = receive<string>(rdb);
+               debug() << "New reply from run DB server: " << known << endmsg;
+               boost::optional<double> dt{};
+               if (known == Monitoring::s_Unknown) {
+                  warning() << "RunDBServ does not have info for "
+                            << (run ? "run " + to_string(*run) : string{"a run"}) << endmsg;
                } else {
-                  send(rep, false);
-                  runs.emplace(reqRun, app);
-                  if (!request) break;
-               }
-            } else if (more) {
-               while(more) {
-                  receive<zmq::message_t>(rep, &more);
+                  auto info = receive<Monitoring::RunInfo>(rdb);
+                  if (info.deadtime >= 0.) {
+                     dt = info.deadtime;
+                     if (UNLIKELY(msgLevel(MSG::DEBUG)))
+                        debug() << "Deadtime from run DB for run: " << info.run << " "
+                                << info.deadtime << endmsg;
+                  } else {
+                     warning() << "Got reply from RunDBServ for run " << info.run
+                               << ", but deadtime is not known." << endmsg;
+                  }
+                  deadtimes[info.run] = dt;
+                  if (checkIt != end(toCheck) && (*run == info.run)) {
+                     checkIt = toCheck.erase(checkIt);
+                  }
                }
+               request = false;
             }
-         }
 
-         if (!reply) {
-            tries += !other;
-         }
-
-         if (reply && deadtimes.count(run)) {
-            break;
+            // RunDB connection is now confused, recreate it.
+            if (nRep == 0 && request) {
+               if (UNLIKELY(msgLevel(MSG::DEBUG) && run)) {
+                  debug() << "RunDB info for run: " << *run << " " << *app << " timed out." << endmsg;
+               }
+               rdb = runDBReq();
+               request = false;
+               if (++tries == m_runDBRetries) {
+                  if (checkIt != end(toCheck)) ++checkIt;
+                  if (checkIt == end(toCheck)) checkIt = begin(toCheck);
+                  break;
+               }
+            } else if (!request) {
+               break;
+            }
          }
+         if (done) break;
       }
-
       if (done) break;
-
-      // RunDB connection is now confused, restart it.
-      if (request && !reply) {
-         rdb = runDBReq();
-      }
    }
 }
 
@@ -425,7 +438,13 @@ void Hlt2MonInfoSvc::function()
    // Create frontend, backend and control sockets
    zmq::socket_t data = socket(ZMQ_SUB);
    zmq::setsockopt(data, zmq::LINGER, 0);
-   data.bind(m_frontCon.c_str());
+   zmq::setsockopt(data, zmq::RCVHWM, 10000);
+   try {
+      data.bind(m_frontCon.c_str());
+   } catch (const zmq::error_t& e) {
+      error() << "Failed to bind connection " << m_frontCon << endmsg;
+      throw e;
+   }
    zmq::setsockopt(data, zmq::SUBSCRIBE, "");
    info() << "Bound data input socket to: " << m_frontCon << endmsg;
 
@@ -475,11 +494,32 @@ void Hlt2MonInfoSvc::function()
       {inf, 0, ZMQ_POLLIN, 0},
       {syncer, 0, ZMQ_POLLIN, 0}
    };
-   bool paused = false;
+
+   // Storage
+   Monitoring::HistoMap histograms;
+   Monitoring::RunInfoMap runInfo;
+   Monitoring::CounterMap counters;
+
+   // Load histogram info from file
+   if (!m_inputFile.empty()) {
+      auto loaded = loadHistoInfo(histograms, m_inputFile);
+      if (loaded) {
+         debug() << "Read " << histograms.size() << " histograms from "
+                 << m_inputFile << endmsg;
+      } else {
+         warning() << "Failed to load histo info from " << m_inputFile << endmsg;
+      }
+   }
+
+   bool syncMore = false;
+
+   set<string> connections;
+   set<string>::const_iterator connection;
+   size_t syncTries = 5;
 
    while (true) {
       //  Process messages from all sockets
-      zmq::poll (&items[0], 4, -1);
+      auto n = zmq::poll (&items[0], 4, syncMore ? 0 : -1);
 
       if (items[0].revents & ZMQ_POLLIN) {
          // Control messages
@@ -494,16 +534,10 @@ void Hlt2MonInfoSvc::function()
             break;
          } else if (cmd == Monitoring::s_Check) {
             m_controlConnected = true;
-         } else if (cmd == "PAUSE") {
-            debug() << name() << " paused." << endmsg;
-            paused = true;
-         } else if (cmd == "RESUME") {
-            debug() << name() << " resumed." << endmsg;
-            paused = false;
          }
       }
 
-      if (!paused && (items[1].revents & ZMQ_POLLIN)) {
+      if (items[1].revents & ZMQ_POLLIN) {
          vector<zmq::message_t> msgs;
          bool more = true;
          while (more) {
@@ -517,11 +551,11 @@ void Hlt2MonInfoSvc::function()
 
          auto newMsg = false;
          if (type == Monitoring::s_HistoInfo) {
-            newMsg = decodeHistoInfo(msgs);
+            newMsg = decodeHistoInfo(histograms, msgs);
          } else if (type == Monitoring::s_RunInfo) {
-            newMsg = decodeRunInfo(msgs);
+            newMsg = decodeRunInfo(runInfo, msgs);
          } else if (type == Monitoring::s_CounterInfo) {
-            newMsg = decodeCounterInfo(msgs);
+            newMsg = decodeCounterInfo(counters, msgs);
          } else if (type == Monitoring::s_Check) {
             if (msgs.size() != 2) {
                warning() << "Faulty check request of " << msgs.size()
@@ -547,32 +581,78 @@ void Hlt2MonInfoSvc::function()
          }
       }
 
-      if (!paused && (items[2].revents & ZMQ_POLLIN)) {
+      if (items[2].revents & ZMQ_POLLIN) {
          // Requests for info
          // what type
          auto type = receive<std::string>(inf);
          if (type == Monitoring::s_Ping) {
             send(inf, Monitoring::s_Pong);
          } else if (type == Monitoring::s_Sync) {
-            syncRequest(inf);
+            syncRequest(histograms, runInfo, counters, inf);
          } else if (type == Monitoring::s_CounterInfo) {
-            counterInfoRequest(inf);
+            counterInfoRequest(counters, inf);
          } else if (type == Monitoring::s_HistoInfo) {
-            histoInfoRequest(inf);
+            histoInfoRequest(histograms, inf);
          } else if (type == Monitoring::s_RunInfo) {
-            runInfoRequest(inf);
+            runInfoRequest(runInfo, inf);
          } else {
             warning() << "Unknown type of info request received: " << type << endmsg;
          }
       }
 
-      if (!paused && (items[3].revents & ZMQ_POLLIN)) {
-         auto cmd = receive<std::string>(syncer);
-         if (cmd == Monitoring::s_Sync) {
-            sync();
+      // We should sync:
+      // - if we get a sync command
+      // - if there is more to sync, but no other messages
+      if ((items[3].revents & ZMQ_POLLIN) || (syncMore && n == 0)) {
+
+         bool doSync = syncMore;
+
+         // Receive new connections if they were sent
+         if (items[3].revents & ZMQ_POLLIN) {
+            auto cmd = receive<std::string>(syncer);
+            if (cmd == Monitoring::s_Sync) {
+               connections = receive<set<string>>(syncer);
+               connection = begin(connections);
+               syncTries = m_syncTries;
+               doSync = true;
+               if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                  debug() << "Synchronising with:";
+                  for (auto c : connections) {
+                     debug() << " " << c;
+                  }
+                  debug() << endmsg;
+               }
+            }
+         }
+
+         // If we have a connection and should sync, do it.
+         if (doSync && connection != end(connections)) {
+            vector<string> what = {Monitoring::s_HistoInfo,
+                                   Monitoring::s_RunInfo,
+                                   Monitoring::s_CounterInfo};
+
+            if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+               verbose() << "Synchronising with: " << *connection << endmsg;
+            }
+            auto r = sync(*connection, what, unordered_set<Monitoring::RunNumber>{},
+                          histograms, HistoKeys{},
+                          runInfo,
+                          counters);
+            if (r) {
+               // If we successfully synced some histograms, indicate
+               // whether there is more to be synced.
+               syncMore = *r;
+            } else {
+               // Otherwise, try the next connection, or start from
+               // the beginning if there are tries left.
+               ++connection;
+               if (connection == end(connections) && syncTries > 0) {
+                  connection = begin(connections);
+                  syncTries = m_syncTries;
+               }
+            }
          }
       }
-
    }
 
    m_stopSync = true;
@@ -584,7 +664,8 @@ void Hlt2MonInfoSvc::function()
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::decodeHistoInfo(const vector<zmq::message_t>& msgs) const
+bool Hlt2MonInfoSvc::decodeHistoInfo(Monitoring::HistoMap& histograms,
+                                     const vector<zmq::message_t>& msgs) const
 {
    if (msgs.size() != 5) {
       return false;
@@ -593,24 +674,42 @@ bool Hlt2MonInfoSvc::decodeHistoInfo(const vector<zmq::message_t>& msgs) const
    const auto run = decode<Monitoring::RunNumber>(msgs[1]);
    const auto id = decode<Monitoring::HistId>(msgs[2]);
    const pair<Monitoring::RunNumber, Monitoring::HistId> key{run, id};
+   auto type = decode<std::string>(msgs[3]);
 
-   Printer printer{};
+   MonInfo::Printer printer{};
 
    string title;
-   if (!m_histograms.count(key)) {
-      auto type = decode<std::string>(msgs[3]);
+   if (!histograms.count(key)) {
       HistoMap::const_iterator it;
-      bool placed;
+      bool placed = false;
+
+      // New histogram, do we need to share content?
+      boost::optional<HistoVariant> variant;
       if (type == Monitoring::s_Rate) {
-         std::tie(it, placed) = m_histograms.emplace(key, make_pair(type, decode<string>(msgs[4])));
+         variant = decode<string>(msgs[4]);
       } else if (type == Monitoring::s_Histo1D) {
-         std::tie(it, placed) = m_histograms.emplace(key, make_pair(type, decode<Histo1DDef>(msgs[4])));
+         variant = decode<Histo1DDef>(msgs[4]);
       } else if (type == Monitoring::s_Histo2D) {
-         std::tie(it, placed) = m_histograms.emplace(key, make_pair(type, decode<Histo2DDef>(msgs[4])));
+         variant = decode<Histo2DDef>(msgs[4]);
+      } else {
+         warning() << "Unkown type of histogram info: " << type
+                   << " for histogram with ID: " << run << " " << id << endmsg;
+      }
+
+      if (variant) {
+         std::tie(it, placed) = addHistogram(histograms, key, type, std::move(*variant));
+      }
+
+      if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+         if (placed) {
+            verbose() << "New histogram: " << key.first << " " << std::setw(20) << std::right
+                      << key.second << " " << std::setw(7) << std::left << type << " "
+                      << boost::apply_visitor(printer, it->content()) << endmsg;
+         } else {
+            verbose() << "Known histogram: " << key.first << " " << std::setw(20) << std::right
+                      << key.second << endmsg;
+         }
       }
-      debug() << "New histogram: " << key.first << " " << std::setw(20) << std::right
-              << key.second << " " << std::setw(7) << std::left << type << " "
-              << boost::apply_visitor(printer, it->second.second) << endmsg;
       return true;
    } else {
       return false;
@@ -618,7 +717,9 @@ bool Hlt2MonInfoSvc::decodeHistoInfo(const vector<zmq::message_t>& msgs) const
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::decodeCounterInfo(const vector<zmq::message_t>& msgs, const bool quiet) const
+bool Hlt2MonInfoSvc::decodeCounterInfo(Monitoring::CounterMap& counters,
+                                       const vector<zmq::message_t>& msgs,
+                                       const bool quiet) const
 {
    if (msgs.size() != 3) {
       return false;
@@ -626,13 +727,13 @@ bool Hlt2MonInfoSvc::decodeCounterInfo(const vector<zmq::message_t>& msgs, const
 
    const auto id = decode<Monitoring::HistId>(msgs[1]);
 
-   Printer printer{};
+   MonInfo::Printer printer{};
 
    string title;
-   if (!m_counters.count(id)) {
+   if (!counters.count(id)) {
       CounterMap::const_iterator it;
       bool placed;
-      std::tie(it, placed) = m_counters.emplace(id, decode<CounterDef>(msgs[2]));
+      std::tie(it, placed) = counters.emplace(id, decode<CounterDef>(msgs[2]));
       if (UNLIKELY(msgLevel(MSG::DEBUG) && !quiet))
          debug() << std::setw(22) << std::left << "New counter: " << std::setw(20)
                  << std::right << id << " COUNTER " << printer(it->second) << endmsg;
@@ -643,31 +744,35 @@ bool Hlt2MonInfoSvc::decodeCounterInfo(const vector<zmq::message_t>& msgs, const
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::decodeRunInfo(const vector<zmq::message_t>& msgs) const
+bool Hlt2MonInfoSvc::decodeRunInfo(Monitoring::RunInfoMap& runInfo,
+                                   const vector<zmq::message_t>& msgs) const
 {
    if (msgs.size() != 3) {
       return false;
    }
    auto app = decode<pair<string, string>>(msgs[1]);
-   auto runInfo = decode<Monitoring::RunInfo>(msgs[2]);
-
-   debug() << "Decoded run info: " << runInfo.run << " " << app.first << " "
-           << runInfo.start << " " << runInfo.tck << endmsg;
+   auto info = decode<Monitoring::RunInfo>(msgs[2]);
 
    // Add to internal store
-   RunInfoKey key{runInfo.run, app.first};
-   if (!m_runInfo.count(key)) {
+   RunInfoKey key{info.run, app.first};
+   if (!runInfo.count(key)) {
       bool complete = !m_top;
       if (m_top) {
-         auto dt = deadtime(runInfo.run, app.first);
+         auto dt = deadtime(info.run, app.first);
          if (dt) {
-            runInfo.deadtime = *dt;
+            info.deadtime = *dt;
             complete = true;
          }
       }
       if (UNLIKELY(msgLevel(MSG::DEBUG)))
-         debug() << "New run info for run: " << runInfo.run << " " << app.first << endmsg;
-      m_runInfo.emplace(std::move(key), make_pair(complete, std::move(runInfo)));
+         debug() << "New run info for run: " << info.run << " " << app.first
+                 << " complete " << complete << " deadtime " << info.deadtime
+                 << endmsg;
+      runInfo.emplace(std::move(key), make_pair(complete, std::move(info)));
+      if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+         debug() << "Decoded run info: " << info.run << " " << app.first << " "
+                 << info.start << " " << info.tck << endmsg;
+      }
       return true;
    } else {
       return false;
@@ -675,128 +780,165 @@ bool Hlt2MonInfoSvc::decodeRunInfo(const vector<zmq::message_t>& msgs) const
 }
 
 //===============================================================================
-void Hlt2MonInfoSvc::sync() const
+boost::optional<bool> Hlt2MonInfoSvc::sync(std::string connection,
+                                           const std::vector<string>& what,
+                                           const std::unordered_set<Monitoring::RunNumber>& reqRuns,
+                                           Monitoring::HistoMap& histograms,
+                                           const Monitoring::HistoKeys& exclude,
+                                           Monitoring::RunInfoMap& runInfo,
+                                           Monitoring::CounterMap& counters) const
 {
-
-   vector<string> what = {Monitoring::s_HistoInfo,
-                          Monitoring::s_RunInfo,
-                          Monitoring::s_CounterInfo};
-
    HistoKeys histoKeys;
-   std::for_each(begin(m_histograms), end(m_histograms),
-                 [&histoKeys](const HistoMap::value_type& entry) {
-                    histoKeys.emplace(entry.first);
+   std::for_each(begin(histograms), end(histograms),
+                 [&histoKeys](const HistoEntry& entry) {
+                    histoKeys.emplace(entry.key);
+                 });
+
+   std::for_each(begin(exclude), end(exclude),
+                 [&histoKeys](const HistoKey& key) {
+                    histoKeys.emplace(key);
                  });
 
    // gather run info
    RunInfoKeys runKeys;
-   std::for_each(begin(m_runInfo), end(m_runInfo),
+   std::for_each(begin(runInfo), end(runInfo),
                  [&runKeys](const RunInfoMap::value_type& entry) {
                     runKeys.emplace(entry.first);
                  });
 
    CounterKeys counterKeys;
-   std::for_each(begin(m_counters), end(m_counters),
+   std::for_each(begin(counters), end(counters),
                  [&counterKeys](const CounterMap::value_type& entry) {
                     counterKeys.emplace(entry.first);
                  });
 
-   if (UNLIKELY(msgLevel(MSG::DEBUG))) {
-      debug() << "Synchronising with:";
-      for (auto c : m_syncConnections) {
-         debug() << " " << c;
+   // Connect output request socket
+   zmq::socket_t out = socket(zmq::REQ);
+   zmq::setsockopt(out, zmq::LINGER, 0);
+   zmq::setsockopt(out, zmq::RCVTIMEO, 100);
+   out.connect(connection.c_str());
+
+   boost::optional<bool> more;
+
+   // Request synchronisation
+   send(out, Monitoring::s_Sync, zmq::SNDMORE);
+
+   // Send what we want to synchronise in the right order
+   send(out, what, zmq::SNDMORE);
+
+   for (auto w : what) {
+      if (w == Monitoring::s_HistoInfo) {
+         // Indicate we want all runs, by sending an empty set, and
+         // the histo keys we have.
+         send(out, reqRuns, zmq::SNDMORE);
+         send(out, histoKeys, w == what.back() ? 0 : zmq::SNDMORE);
+      } else if (w == Monitoring::s_RunInfo) {
+         // The run info we have
+         send(out, runKeys, w == what.back() ? 0 : zmq::SNDMORE);
+      } else if (w == Monitoring::s_CounterInfo) {
+         send(out, counterKeys, w == what.back() ? 0 : zmq::SNDMORE);
       }
-      debug() << endmsg;
    }
-   for (auto connection : m_syncConnections) {
-      if (UNLIKELY(msgLevel(MSG::DEBUG)))
-         debug() << "Synchronising with: " << connection << endmsg;
-
-      // Connect output request socket
-      zmq::socket_t out = socket(zmq::REQ);
-      zmq::setsockopt(out, zmq::LINGER, 0);
-      zmq::setsockopt(out, zmq::RCVTIMEO, 100);
-      out.connect(connection.c_str());
-
-      // Request synchronisation
-      send(out, Monitoring::s_Sync, zmq::SNDMORE);
 
-      // Send what we want to synchronise in the right order
-      send(out, what, zmq::SNDMORE);
-
-      // The histo keys we have
-      send(out, histoKeys, zmq::SNDMORE);
-
-      // The run info we have
-      send(out, runKeys, zmq::SNDMORE);
-
-      // The counter keys we have
-      send(out, counterKeys);
+   boost::optional<string> rep;
+   zmq::pollitem_t items[] = {
+      {out, 0, ZMQ_POLLIN, 0}
+   };
 
-      boost::optional<string> rep;
-      zmq::pollitem_t items[] = {
-         {out, 0, ZMQ_POLLIN, 0}
-      };
+   zmq::poll(&items[0], 1, 1000);
+   if (items[0].revents & ZMQ_POLLIN) {
+      rep = receive<std::string>(out);
+   }
 
-      zmq::poll(&items[0], 1, 1000);
-      if (items[0].revents & ZMQ_POLLIN) {
-         rep = receive<std::string>(out);
-      }
+   if (!rep) {
+      debug() << "Sync request reply timed out." << endmsg;
+      return more;
+   } else if (*rep != "INCOMING") {
+      debug() << "Bad reply to sync request: " << *rep << endmsg;
+      return more;
+   }
 
-      if (!rep) {
-         debug() << "Sync request reply timed out." << endmsg;
-      } else if (*rep == "INCOMING") {
+   for (auto w : what) {
+      if (w == Monitoring::s_HistoInfo) {
          std::unordered_map<Monitoring::RunNumber, size_t> hpr;
 
          // Histograms
          auto histos = receive<HistoPub>(out);
+         auto m = receive<bool>(out);
+         more = (more ? *more | m : m);
          for (const auto& entry : histos) {
-            hpr[std::get<0>(entry)]++;
+            const auto& type = std::get<0>(entry);
+            const auto& info = std::get<1>(entry);
+            const auto& keys = std::get<2>(entry);
 
+            // Entry consists of (RunNumber, HistId, type, info_string),
+            // where the info string needs to be converted to a message.
             vector<zmq::message_t> msgs;
             msgs.reserve(5);
-            msgs.emplace_back(encode(Monitoring::s_HistoInfo));
-            msgs.emplace_back(encode(std::get<0>(entry)));
-            msgs.emplace_back(encode(std::get<1>(entry)));
-            msgs.emplace_back(encode(std::get<2>(entry)));
 
-            const auto& infoString = std::get<3>(entry);
-            zmq::message_t msg{infoString.size()};
-            std::copy_n(begin(infoString), infoString.size(), static_cast<char*>(msg.data()));
-            msgs.emplace_back(std::move(msg));
-            decodeHistoInfo(msgs);
-         }
+            for (const auto& key : keys) {
+               if (msgs.empty()) {
+                  msgs.emplace_back(encode(Monitoring::s_HistoInfo));
+                  msgs.emplace_back(encode(key.first));
+                  msgs.emplace_back(encode(key.second));
+                  msgs.emplace_back(encode(type));
 
-         if (UNLIKELY(msgLevel(MSG::DEBUG))) {
-            for (const auto& entry : hpr) {
-               debug() << "Decoded " << std::right << std::setw(6) << to_string(entry.second)
-                       << " histograms for run " << entry.first << endmsg;
+                  zmq::message_t msg{info.size()};
+                  std::copy_n(begin(info), info.size(), static_cast<char*>(msg.data()));
+                  msgs.emplace_back(std::move(msg));
+               } else {
+                  msgs[1] = encode(key.first);
+                  msgs[2] = encode(key.second);
+               }
+               // Update known keys with received info
+               histoKeys.emplace(key.first, key.second);
+
+               hpr[key.first]++;
+               decodeHistoInfo(histograms, msgs);
             }
          }
 
+         for (const auto& entry : hpr) {
+            info() << "Decoded " << std::right << std::setw(6) << to_string(entry.second)
+                   << " histograms for run " << entry.first << endmsg;
+         }
+         if (UNLIKELY(msgLevel(MSG::VERBOSE) && hpr.empty())) {
+            verbose() << "No new histogram info after synchronisation." << endmsg;
+         }
+      } else if (w == Monitoring::s_RunInfo) {
          // Run infos
          auto runInfos = receive<RunInfoPub>(out);
+         auto m = receive<bool>(out);
+         more = (more ? *more | m : m);
          for (const auto& entry : runInfos) {
+            // Update known keys with received info
+            runKeys.emplace(entry.second.run, entry.first);
+
             vector<zmq::message_t> msgs;
             msgs.reserve(3);
             msgs.emplace_back(encode(Monitoring::s_RunInfo));
             msgs.emplace_back(encode(make_pair(entry.first, string{"fake"})));
             msgs.emplace_back(encode(entry.second));
 
-            decodeRunInfo(msgs);
+            decodeRunInfo(runInfo, msgs);
          }
-
+      } else if (w == Monitoring::s_CounterInfo) {
          // Counters
-         auto counters = receive<CounterPub>(out);
+         auto counterPub = receive<CounterPub>(out);
+         auto m = receive<bool>(out);
+         more = (more ? *more | m : m);
 
          size_t nc = 0;
-         for (const auto& entry : counters) {
+         for (const auto& entry : counterPub) {
+            auto id = std::get<0>(entry);
+            counterKeys.emplace(id);
+
             vector<zmq::message_t> msgs;
             msgs.reserve(3);
             msgs.emplace_back(encode(Monitoring::s_CounterInfo));
-            msgs.emplace_back(encode(std::get<0>(entry)));
+            msgs.emplace_back(encode(id));
             msgs.emplace_back(encode(std::get<1>(entry)));
-            decodeCounterInfo(msgs, true);
+            decodeCounterInfo(counters, msgs, true);
             ++nc;
          }
 
@@ -804,28 +946,43 @@ void Hlt2MonInfoSvc::sync() const
             if (nc != 0) {
                debug() << "Decoded " << std::right << std::setw(6) << to_string(nc)
                        << " counters" << endmsg;
-            } else if (!hpr.empty() && !runInfos.empty() && nc == 0) {
-               debug() << "No new info after synchronisation." << endmsg;
+            } else {
+               verbose() << "No new counter info after synchronisation." << endmsg;
             }
          }
+      }
+   }
+   if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+      if (more && *more) {
+         debug() << "Syncing again with " << connection
+                 << " as there is more info." << endmsg;
       } else {
-         info() << "Got unexpected reply to info request: " << *rep << endmsg;
+         verbose() << "Done syncing with " << connection << endmsg;
       }
    }
+   return more;
 }
 
 //===============================================================================
-void Hlt2MonInfoSvc::syncRequest(zmq::socket_t& inf) const
+void Hlt2MonInfoSvc::syncRequest(const Monitoring::HistoMap& histograms,
+                                 const Monitoring::RunInfoMap& runInfo,
+                                 const Monitoring::CounterMap& counters,
+                                 zmq::socket_t& inf) const
 {
 
    HistoKeys otherKeys;
    CounterKeys otherCounters;
    RunInfoKeys otherRuns;
+   unordered_set<Monitoring::RunNumber> reqRuns;
 
-   // What runs and histos does the other know?
+   // What types of info need synchronising?
    auto what = receive<vector<string>>(inf);
+
+   // What runs and histos does the other know?
    for (auto type : what) {
       if (type == Monitoring::s_HistoInfo) {
+         // What runs do we want info about?
+         reqRuns = receive<unordered_set<Monitoring::RunNumber>>(inf);
          otherKeys = receive<HistoKeys>(inf);
       } else if (type == Monitoring::s_CounterInfo) {
          otherCounters = receive<CounterKeys>(inf);
@@ -849,51 +1006,107 @@ void Hlt2MonInfoSvc::syncRequest(zmq::socket_t& inf) const
    // Send requested information in the order it was requested
    send(inf, "INCOMING", zmq::SNDMORE);
 
+   if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+      verbose() << "Received request to sync:";
+      for (auto type : what) {
+         verbose() << " " << type;
+      }
+      verbose() << endmsg;
+   }
+
    for (auto type : what) {
       if (type == Monitoring::s_HistoInfo) {
-         // Find the info that the other service does not have.
-         auto pubHistos = SelectPub<HistoMap, HistoKeys>{}(m_histograms, otherKeys);
+
+         // Check wether the run of a given histogram has been requested
+         auto useRun = [&reqRuns](Monitoring::RunNumber run) -> bool {
+            return reqRuns.empty() || reqRuns.count(run);
+         };
+
          // Publish histo info
          HistoPub histos;
-         histos.reserve(pubHistos.size());
-         for (auto it : pubHistos) {
-            auto infoString = makeString(boost::apply_visitor(encoder, it->second.second));
-            histos.emplace_back(make_tuple(it->first.first, it->first.second,
-                                           it->second.first, std::move(infoString)));
+
+         // Get our histograms by content (hash)
+         const auto& hbh = histograms.get<ByContent>();
+         histos.reserve(std::min(m_syncBatchSize / 2, hbh.size()));
+
+         // Loop over the hashes and for each hash add all keys that
+         // are not known for that given hash, if the run was
+         // requested.
+         bool more = false;
+         for (auto it = begin(hbh); it != end(hbh); it = hbh.upper_bound(it->hash)) {
+            HistoMap::index<ByContent>::type::const_iterator first, last;
+            std::tie(first, last) = hbh.equal_range(it->hash);
+            std::vector<HistoKey> keys;
+            keys.reserve(std::distance(first, last));
+            std::for_each(first, last, [&keys, &otherKeys, &useRun](const HistoEntry& entry) {
+                  if (useRun(entry.key.first) && !otherKeys.count(entry.key)) {
+                     keys.push_back(entry.key);
+                  }
+               });
+            keys.shrink_to_fit();
+            if (!keys.empty()) {
+               auto infoString = makeString(boost::apply_visitor(encoder, it->content()));
+               histos.emplace_back(make_tuple(it->type, std::move(infoString), std::move(keys)));
+            }
+            // TODO: Do we count hashes, or do we count keys??
+            if (histos.size() > m_syncBatchSize / 2) {
+               more = true;
+               break;
+            }
          }
-         send(inf, histos, type == what.back() ? 0 : zmq::SNDMORE);
-      }
+         // Indicate if there is more to sync, and then send the info
 
-      if (type == Monitoring::s_RunInfo) {
-         auto pubRuns = SelectPub<RunInfoMap, RunInfoKeys>{}(m_runInfo, otherRuns);
+         // Send the info and indicate if there is more to sync,
+         if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+            verbose() << "Sending " << histos.size() << " histos, more = "
+                      << more << endmsg;
+         }
+
+         send(inf, histos, zmq::SNDMORE);
+         send(inf, more, type == what.back() ? 0 : zmq::SNDMORE);
+      } else if (type == Monitoring::s_RunInfo) {
+         auto r = SelectPub<RunInfoMap, RunInfoKeys>{}(runInfo, otherRuns, m_syncBatchSize);
 
          // Publish run info
          RunInfoPub runInfos;
-         runInfos.reserve(pubRuns.size());
-         for (auto it : pubRuns) {
+         runInfos.reserve(r.first.size());
+         for (auto it : r.first) {
             runInfos.emplace_back(make_pair(it->first.second, it->second.second));
          }
-         send(inf, runInfos, type == what.back() ? 0 : zmq::SNDMORE);
-      }
 
-      if (type == Monitoring::s_CounterInfo) {
-         auto pubCounters = SelectPub<CounterMap, CounterKeys>{}(m_counters, otherCounters);
+         // Send the info and indicate if there is more to sync,
+         if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+            verbose() << "Sending " << runInfos.size() << " run infos, more = "
+                      << r.second << endmsg;
+         }
+
+         send(inf, runInfos, zmq::SNDMORE);
+         send(inf, r.second, type == what.back() ? 0 : zmq::SNDMORE);
+      } else if (type == Monitoring::s_CounterInfo) {
+         auto r = SelectPub<CounterMap, CounterKeys>{}(counters, otherCounters, m_syncBatchSize);
 
          // Publish counter info
          CounterPub counters;
-         counters.reserve(pubCounters.size());
-         for (auto it : pubCounters) {
+         counters.reserve(r.first.size());
+         for (auto it : r.first) {
             counters.emplace_back(make_tuple(it->first, it->second));
          }
 
+         if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+            verbose() << "Sending " << counters.size() << " counters, more = "
+                      << r.second << endmsg;
+         }
+
          // Send information
-         send(inf, counters, type == what.back() ? 0 : zmq::SNDMORE);
+         send(inf, counters, zmq::SNDMORE);
+         send(inf, r.second, type == what.back() ? 0 : zmq::SNDMORE);
       }
    }
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::histoInfoRequest(zmq::socket_t& inf) const
+bool Hlt2MonInfoSvc::histoInfoRequest(const Monitoring::HistoMap& histograms,
+                                      zmq::socket_t& inf) const
 {
    // What run and ID requested?
    auto key = receiveRunAndId(inf);
@@ -901,8 +1114,10 @@ bool Hlt2MonInfoSvc::histoInfoRequest(zmq::socket_t& inf) const
    Monitoring::HistId id;
    std::tie(run, id) = key;
 
-   verbose() << std::setw(21) << "New histo info request: "
-             << std::right << run << " " << std::setw(20) << id << endmsg;
+   if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+      verbose() << std::setw(21) << "New histo info request: "
+                << std::right << run << " " << std::setw(20) << id << endmsg;
+   }
 
    // Prepare reply
    std::string known;
@@ -911,11 +1126,11 @@ bool Hlt2MonInfoSvc::histoInfoRequest(zmq::socket_t& inf) const
 
    Encoder encoder{zmq()};
 
-   auto it = m_histograms.find(key);
-   if (it != end(m_histograms)) {
+   auto it = histograms.find(key);
+   if (it != end(histograms)) {
       send(inf, Monitoring::s_Known, zmq::SNDMORE);
-      send(inf, it->second.first, zmq::SNDMORE);
-      send(inf, boost::apply_visitor(encoder, it->second.second));
+      send(inf, it->type, zmq::SNDMORE);
+      send(inf, boost::apply_visitor(encoder, it->content()));
    } else {
       send(inf, Monitoring::s_Unknown);
    }
@@ -924,20 +1139,23 @@ bool Hlt2MonInfoSvc::histoInfoRequest(zmq::socket_t& inf) const
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::counterInfoRequest(zmq::socket_t& inf) const
+bool Hlt2MonInfoSvc::counterInfoRequest(const Monitoring::CounterMap& counters,
+                                        zmq::socket_t& inf) const
 {
    // What run and ID requested?
    auto id = receive<Monitoring::HistId>(inf);
 
-   verbose() << std::setw(21) << "New counter info request: "
-             << " " << std::setw(27) << id << endmsg;
+   if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+      verbose() << std::setw(21) << "New counter info request: "
+                << " " << std::setw(27) << id << endmsg;
+   }
 
    // Prepare reply
    std::string known;
    std::string reply;
 
-   auto it = m_counters.find(id);
-   if (it != end(m_counters)) {
+   auto it = counters.find(id);
+   if (it != end(counters)) {
       send(inf, Monitoring::s_Known, zmq::SNDMORE);
       send(inf, it->second);
    } else {
@@ -948,19 +1166,24 @@ bool Hlt2MonInfoSvc::counterInfoRequest(zmq::socket_t& inf) const
 }
 
 //===============================================================================
-bool Hlt2MonInfoSvc::runInfoRequest(zmq::socket_t& inf) const
+bool Hlt2MonInfoSvc::runInfoRequest(Monitoring::RunInfoMap& runInfo,
+                                    zmq::socket_t& inf) const
 {
    // Incoming IDs
    auto run = receive<Monitoring::RunNumber>(inf);
    auto app = receive<string>(inf);
 
-   verbose() << std::setw(21) << "New run info requested for " << run << " "
-             << app << endmsg;
+   if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+      verbose() << std::setw(21) << "New run info requested for " << run << " "
+                << app << endmsg;
+   }
 
-   auto it = m_runInfo.find(make_pair(run, app));
-   if (it == end(m_runInfo)) {
+   auto it = runInfo.find(make_pair(run, app));
+   if (it == end(runInfo)) {
       send(inf, Monitoring::s_Unknown);
-      verbose() << "Run info unknown " << endmsg;
+      if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+         verbose() << "Run info unknown " << endmsg;
+      }
    } else {
       if (m_top && !it->second.first) {
          auto dt = deadtime(run, app);
@@ -969,7 +1192,10 @@ bool Hlt2MonInfoSvc::runInfoRequest(zmq::socket_t& inf) const
             it->second.first = true;
          }
       }
-      verbose() << "Run info known with deadtime: " << it->second.second.deadtime << endmsg;
+      if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+         verbose() << "Run info known with deadtime: "
+                   << it->second.second.deadtime << endmsg;
+      }
       send(inf, Monitoring::s_Known, zmq::SNDMORE);
       send(inf, it->second.second);
    }
@@ -984,6 +1210,13 @@ boost::optional<double> Hlt2MonInfoSvc::deadtime(const Monitoring::RunNumber run
       m_runDBThread = make_unique<std::thread>([this]{runDB();});
    }
 
+   auto vb = msgLevel(MSG::VERBOSE);
+
+   if (UNLIKELY(vb)) {
+      verbose() << "Requesting deadtime for run "
+                << run << " from run DB thread." << endmsg;
+   }
+
    auto rdb = runDBSocket();
    boost::optional<double> r{};
    size_t tries = 0;
@@ -1005,12 +1238,16 @@ boost::optional<double> Hlt2MonInfoSvc::deadtime(const Monitoring::RunNumber run
       if (items[0].revents & ZMQ_POLLIN) {
          try {
             auto known = receive<bool>(rdb);
+            if (UNLIKELY(vb)) verbose() << "Reply from run DB thread: "
+                                        << (known ? "known: " : "unknown.");
             if (known) {
                auto dt = receive<double>(rdb);
+               if (UNLIKELY(vb)) verbose() << dt;
                if (dt >= 0.) {
                   r = dt;
                }
             }
+            if (UNLIKELY(vb))verbose() << endmsg;
          break;
          } catch (const ZMQ::TimeOutException&) {
             rdb = runDBSocket();
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.h b/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.h
index cae8bd57b4ece7f62c8713a7bfe7541b8dd4c5af..0a59e57504fd607d3edc8237136ccc20e109af9e 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.h
+++ b/Online/Hlt2Monitoring/src/component/Hlt2MonInfoSvc.h
@@ -22,6 +22,7 @@
 
 // Hlt2Monitoring
 #include <Hlt2Monitoring/Types.h>
+#include <Hlt2Monitoring/HistoUtils.h>
 #include <Hlt2Monitoring/RunInfo.h>
 #include <Hlt2Monitoring/CounterDef.h>
 #include <Hlt2Monitoring/HashTuple.h>
@@ -41,7 +42,6 @@ class Hlt2MonInfoSvc : public Hlt2MonBaseSvc {
 public:
 
    Hlt2MonInfoSvc(const std::string& name, ISvcLocator* sl);
-   virtual ~Hlt2MonInfoSvc();
 
    // Service pure virtual member functions
    virtual StatusCode initialize() override;
@@ -55,16 +55,32 @@ private:
    void synchroniser();
 
    // Request to synchronise with others
-   void sync() const;
-
-   bool decodeHistoInfo(const std::vector<zmq::message_t>& info) const;
-   bool decodeCounterInfo(const std::vector<zmq::message_t>& info, const bool quiet = false) const;
-   bool decodeRunInfo(const std::vector<zmq::message_t>& info) const;
-
-   void syncRequest(zmq::socket_t& inf) const;
-   bool histoInfoRequest(zmq::socket_t& inf) const;
-   bool counterInfoRequest(zmq::socket_t& inf) const;
-   bool runInfoRequest(zmq::socket_t& inf) const;
+   boost::optional<bool> sync(std::string connection,
+                              const std::vector<std::string>& what,
+                              const std::unordered_set<Monitoring::RunNumber>& reqRuns,
+                              Monitoring::HistoMap& histograms,
+                              const Monitoring::HistoKeys& exclude,
+                              Monitoring::RunInfoMap& runInfo,
+                              Monitoring::CounterMap& counters) const;
+
+   bool decodeHistoInfo(Monitoring::HistoMap& histograms,
+                        const std::vector<zmq::message_t>& info) const;
+   bool decodeCounterInfo(Monitoring::CounterMap& counters,
+                          const std::vector<zmq::message_t>& info,
+                          const bool quiet = false) const;
+   bool decodeRunInfo(Monitoring::RunInfoMap& runInfo,
+                      const std::vector<zmq::message_t>& info) const;
+
+   void syncRequest(const Monitoring::HistoMap& histograms,
+                    const Monitoring::RunInfoMap& runInfo,
+                    const Monitoring::CounterMap& counters,
+                    zmq::socket_t& inf) const;
+   bool histoInfoRequest(const Monitoring::HistoMap& histograms,
+                         zmq::socket_t& inf) const;
+   bool counterInfoRequest(const Monitoring::CounterMap& counters,
+                           zmq::socket_t& inf) const;
+   bool runInfoRequest(Monitoring::RunInfoMap& runInfo,
+                       zmq::socket_t& inf) const;
 
    void runDB() const;
    std::string runDBThreadCon() const {
@@ -91,6 +107,10 @@ private:
    SmartIF<ITransmitterSvc> m_transmitter;
    unsigned int m_sendInterval;
    unsigned int m_sourceID;
+   std::string m_connectionPath;
+   size_t m_syncBatchSize;
+   size_t m_syncTries;
+   std::string m_inputFile;
 
    // data members
    std::string m_hostname;
@@ -113,41 +133,34 @@ private:
       const IZeroMQSvc& m_zmq;
    };
 
-   // Visitor to print different types in an info message
-   class Printer : public boost::static_visitor<std::string> {
-   public:
-      std::string operator()( const std::string& eval ) {
-         return eval;
-      }
-
-      std::string operator()( const Monitoring::Histo1DDef& def ) {
-         return def.title;
-      }
+   template<class C, class O, class K = typename C::key_type>
+   struct SelectPub {
 
-      std::string operator()( const Monitoring::Histo2DDef& def ) {
-         return def.title;
+      std::pair<std::vector<typename C::const_iterator>, bool>
+      operator()(const C& container, const O& other, size_t n) {
+         auto pred = [](typename C::const_iterator) { return true; };
+         auto extract = [](typename C::const_iterator it) { return it->first; };
+         return operator()(container, other, extract, pred, n);
       }
 
-      std::string operator()( const Monitoring::CounterDef& def ) {
-         return def.name + " " + def.description;
-      }
-   };
-
-   template<class C, class O>
-   struct SelectPub {
-      std::vector<typename C::const_iterator> operator()(const C& container, const O& other) {
+      std::pair<std::vector<typename C::const_iterator>, bool>
+      operator()(const C& container, const O& other,
+                 std::function<K(typename C::const_iterator)> keyExtract,
+                 std::function<bool(typename C::const_iterator)> predicate, size_t n) {
          std::vector<typename C::const_iterator> pub;
+         bool more = false;
          for (auto it = begin(container), last = end(container); it != last; ++it) {
-            if (!other.count(it->first)) pub.emplace_back(it);
+            if (predicate(it) && !other.count(keyExtract(it))) pub.emplace_back(it);
+            if (pub.size() > n) {
+               more = true;
+               break;
+            }
          }
-         return pub;
+         return make_pair(std::move(pub), more);
       }
    };
 
    // Storage for histogram, counter, and run information
-   mutable Monitoring::HistoMap m_histograms;
-   mutable Monitoring::CounterMap m_counters;
-   mutable Monitoring::RunInfoMap m_runInfo;
    mutable boost::unordered_map<Monitoring::RunNumber, int> m_startTimes;
 
    // Run Info
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.cpp b/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.cpp
index 7494bad9c7fd0267d86bdcfad72cf43270b66fe2..d0a39570daf2fde0ee8f7d9d07be8a2b6d023e0d 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.cpp
@@ -5,6 +5,9 @@
 #include <memory>
 #include <set>
 #include <thread>
+#include <random>
+#include <unordered_set>
+#include <unordered_map>
 
 // boost
 #include <boost/algorithm/string/find.hpp>
@@ -31,6 +34,7 @@
 #include <Hlt2Monitoring/Histo1DDef.h>
 #include <Hlt2Monitoring/Histo2DDef.h>
 #include <Hlt2Monitoring/Serialize.h>
+#include <Hlt2Monitoring/Utilities.h>
 
 // ROOT
 #include <TClass.h>
@@ -51,6 +55,7 @@ namespace {
    using std::make_pair;
    using std::pair;
    using std::unordered_map;
+   using std::unordered_set;
 
    using boost::lexical_cast;
    namespace ba = boost::algorithm;
@@ -95,14 +100,15 @@ DECLARE_SERVICE_FACTORY(Hlt2RootPublishSvc)
 
 //=============================================================================
 Hlt2RootPublishSvc::Hlt2RootPublishSvc(const string& name, ISvcLocator* loc)
- : Hlt2MonBaseSvc(name, loc),
-   m_stopPublishing{false}
+ : Hlt2MonBaseSvc(name, loc)
 {
    declareProperty("InfoConnection", m_infoCon);
    declareProperty("PublishInterval", m_publishInterval = 60);
    declareProperty("RateStart", m_rateStart = 0.);
    declareProperty("RunDuration", m_runDuration = 4000.);
    declareProperty("RateInterval", m_rateInterval = 5.);
+   declareProperty("BatchSize", m_batchSizeProp = 400);
+   declareProperty("HighWaterMark", m_hwm = 10000);
 }
 
 //===============================================================================
@@ -113,6 +119,8 @@ StatusCode Hlt2RootPublishSvc::initialize()
     return sc;
   }
 
+  m_batchSize = m_batchSizeProp;
+
   if (m_frontCon.empty() || m_backCon.empty() || m_infoCon.empty()) {
     warning() << "Connections not correctly configured, "
               << "Hlt2 ROOT publisher disabled" << endmsg;
@@ -133,11 +141,14 @@ void Hlt2RootPublishSvc::function()
    auto control = connectControl();
    if (!m_controlConnected) return;
 
-   zmq::socket_t front = socket(ZMQ_SUB);
-   front.connect(m_frontCon.c_str());
+   zmq::socket_t front = socket(zmq::SUB);
+   zmq::setsockopt(front, zmq::LINGER, 0);
    zmq::setsockopt(front, zmq::SUBSCRIBE, "");
-   info() << "Connected front socket to: " << m_frontCon << endmsg;
-
+   auto hwm = boost::numeric_cast<int>(m_hwm);
+   zmq::setsockopt(front, zmq::RCVHWM, hwm);
+   front.connect(m_frontCon.c_str());
+   info() << "Connected front socket to: " << m_frontCon
+          << " with HWM " << hwm << endmsg;
    // Clean up queue
    zmq::message_t msg;
    while (front.recv(&msg, ZMQ_DONTWAIT)) {
@@ -148,8 +159,11 @@ void Hlt2RootPublishSvc::function()
    info() << "Connected info socket to: " << m_infoCon << endmsg;
 
    zmq::socket_t back = socket(ZMQ_PUB);
+   zmq::setsockopt(back, zmq::LINGER, 0);
+   zmq::setsockopt(front, zmq::SNDHWM, hwm);
    back.bind(m_backCon.c_str());
-   info() << "Bound back socket to: " << m_backCon << endmsg;
+   info() << "Bound back socket to: " << m_backCon
+          << " with HWM " << hwm << endmsg;
 
    // Publish trigger thread socket
    zmq::socket_t publish = socket(zmq::PAIR);
@@ -157,15 +171,18 @@ void Hlt2RootPublishSvc::function()
    publish.bind(pubCon().c_str());
 
    // Start thread to trigger saving of histograms
-   std::thread pubThread([this] { periodic(pubCon(), Monitoring::s_Publish,
-                                            m_stopPublishing, m_publishInterval); });
+   std::thread pubThread([this] { periodic(pubCon(), Monitoring::s_Publish, 0.5); });
+   size_t pubCounter = m_publishInterval * 2;
 
    //  Initialize poll set
    zmq::pollitem_t items[] = {{control, 0, ZMQ_POLLIN, 0},
                               {front, 0, ZMQ_POLLIN, 0},
                               {publish, 0, ZMQ_POLLIN, 0}};
 
-   bool paused = false;
+   ROOTHistos histos;
+   SentHistos sentHistos;
+   Queue messages;
+   bool sentAll = true;
 
    while (true) {
       //  Process messages from both sockets
@@ -175,23 +192,13 @@ void Hlt2RootPublishSvc::function()
 
       if (items[0].revents & ZMQ_POLLIN) {
          auto cmd = receive<string>(control);
-         if (cmd == "TERMINATE") {
-            for (auto s : {&front, &inf, &control}) {
-               zmq::setsockopt(*s, zmq::LINGER, 0);
-            }
+         if (cmd == Monitoring::s_Terminate) {
+            send(publish, Monitoring::s_Terminate);
             break;
-         } else if (cmd == "PAUSE") {
-            if (UNLIKELY(msgLevel(MSG::DEBUG)))
-               debug() << name() << " paused." << endmsg;
-            paused = true;
-         } else if (cmd == "RESUME") {
-            if (UNLIKELY(msgLevel(MSG::DEBUG)))
-               debug() << name() << " resumed." << endmsg;
-            paused = false;
          }
       }
 
-      if (!paused && (items[1].revents & ZMQ_POLLIN)) {
+      if (items[1].revents & ZMQ_POLLIN) {
          // Deserialize
          auto type = receive<string>(front);
          if (type == Monitoring::s_HistDiff) {
@@ -202,8 +209,8 @@ void Hlt2RootPublishSvc::function()
                string type, dir;
                TH1* rhist = nullptr;
                HistoKey key{hdiff.runNumber, hdiff.histId};
-               auto it = m_histos.find(key);
-               if (it != end(m_histos)) {  // We have it cached!
+               auto it = histos.find(key);
+               if (it != end(histos)) {  // We have it cached!
                   type = std::get<0>(it->second);
                   dir = std::get<1>(it->second);
                   rhist = std::get<2>(it->second).get();
@@ -212,7 +219,7 @@ void Hlt2RootPublishSvc::function()
                   std::tie(type, dir, hist) = getHistogram(inf, hdiff.runNumber, hdiff.histId);
                   if (hist) {
                      rhist = hist.get();
-                     m_histos.emplace(std::move(key), make_tuple(type, dir, std::move(hist)));
+                     histos.emplace(std::move(key), make_tuple(type, dir, std::move(hist)));
                   }
                }
                if (rhist == nullptr) {
@@ -250,47 +257,160 @@ void Hlt2RootPublishSvc::function()
          }
       }
 
-      if (!paused && items[2].revents & ZMQ_POLLIN) {
+      if (items[2].revents & ZMQ_POLLIN) {
          auto cmd = receive<string>(publish);
          if (cmd == Monitoring::s_Publish) {
-            // Sync with info service and store resulting empty
-            // histogramsq
-            auto histos = syncHistograms(inf);
-            for (auto& entry : histos) {
-               m_histos.emplace(std::move(entry));
+            if (pubCounter > 0) {
+               --pubCounter;
             }
 
-            // Publish
-            publishHistograms(back);
+            if ((pubCounter == 0 && messages.empty() && !histos.empty())
+                || (messages.empty() && !sentAll)) {
+               // Find which runs have histograms that are not empty
+               // NOTE: This can probably be done better with a
+               // multi-index container that is indexed by run and
+               // (run, histID). For now just save all runs for which
+               // there is at least 1 not-empty histogram
+               std::unordered_set<Monitoring::RunNumber> runNotEmpty;
+               size_t nonEmpty = 0;
+               for (auto& entry : histos) {
+                  if (std::get<2>(entry.second)->GetEntries() != 0) {
+                     ++nonEmpty;
+                     runNotEmpty.emplace(entry.first.first);
+                  }
+               }
 
-            // Reset histograms
-            for (auto& entry : m_histos) {
-               std::get<2>(entry.second)->Reset("ICESM");
+               // Sync with info service and store resulting empty
+               // histograms
+               for (auto& entry : syncHistograms(inf, histos, sentHistos, runNotEmpty)) {
+                  histos.emplace(std::move(entry));
+               }
+
+               // Fill the queue of messages
+               auto s = messages.size();
+               auto sent = publishHistograms(histos, sentHistos, messages);
+               debug() << "Created " << messages.size() - s << " messages." << endmsg;
+               sentAll = (sent.size() == nonEmpty);
+
+               // Reset/cleanup histograms
+
+               // Reset histograms for runs where we received something. If the
+               // number of histograms is above the HWM, delete some until half
+               // of the HWM is reached.
+               for (bool onlyEmpty : {true, false}) {
+                  s = histos.size();
+                  auto ne = 0, nr = 0;
+                  for (ROOTHistos::const_iterator it = histos.cbegin();
+                       it != histos.cend();) {
+                     // If we didn't send a histogram, don't touch it
+                     // If we have a lot of histograms, first remove all empty ones.
+                     // If we still have a lot, remove all sent ones.
+                     auto histo = std::get<2>(it->second).get();
+                     if (!sent.count(it->first)) {
+                        ++it;
+                     } else if ((histos.size() > m_hwm / 2)
+                                && (!onlyEmpty || (onlyEmpty && histo->GetEntries() == 0))) {
+                        it = histos.erase(it);
+                        ++ne;
+                     } else if (!onlyEmpty) {
+                        // Only reset on the second pass to avoid doing it twice
+                        histo->Reset("ICESM");
+                        ++it;
+                        ++nr;
+                     } else {
+                        ++it;
+                     }
+                  }
+
+                  debug() << "Pruned " << s - histos.size() << " sent histograms." << endmsg;
+                  debug() << "Erased " << ne << " histograms, reset " << nr << endmsg;
+
+                  if (histos.size() < (m_hwm / 2)) break;
+               }
+
+
+               pubCounter = m_publishInterval * 2;
+            }
+
+            // Send some more if we have a lot of messages
+            if (messages.size() > m_hwm * 4 && m_batchSize < 2 * m_batchSizeProp) {
+               m_batchSize = 2 * m_batchSizeProp;
+            } else if (messages.size() < m_hwm) {
+               m_batchSize = m_batchSizeProp;
+            }
+
+            if (!messages.empty()){
+               auto n = std::min(messages.size(), m_batchSize);
+
+               if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+                  verbose() << "Sending " << n << " messages " << endmsg;
+               }
+
+               for (size_t m = 0; m < n; ++m) {
+                  auto& message = messages.front();
+                  for (size_t i = 0; i < message.size(); ++i) {
+                     send(back, message[i], i < (messages.size() - 1) ? zmq::SNDMORE : 0);
+                  }
+                  messages.pop_front();
+               }
+            } else if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+               verbose() << "Neither histograms nor messages." << endmsg;
             }
          }
       }
    }
 
-   m_stopPublishing = true;
    pubThread.join();
-
 }
 
 //===============================================================================
-void Hlt2RootPublishSvc::publishHistograms(zmq::socket_t& socket) const
+Hlt2RootPublishSvc::SentHistos
+Hlt2RootPublishSvc::publishHistograms(ROOTHistos& histos,
+                                      SentHistos& sentHistos,
+                                      Queue& messages) const
 {
    // Loop over histograms
    if (UNLIKELY(msgLevel(MSG::DEBUG)))
       debug() << "Publishing histograms." << endmsg;
 
-   for (auto& entry : m_histos) {
+   // Find out which
+   std::unordered_set<Monitoring::RunNumber> runNotEmpty;
+   for (auto& entry : histos) {
+      if (std::get<2>(entry.second)->GetEntries() != 0) {
+         runNotEmpty.emplace(entry.first.first);
+      }
+   }
+
+   std::vector<ROOTHistos::const_iterator> iterators;
+   iterators.reserve(histos.size());
+   for (auto it = begin(histos), last = end(histos);
+        it != last; ++it) {
+      iterators.push_back(it);
+   }
+
+   // If we have too many histograms to send, randomize their order and send what
+   // we can.
+   if (iterators.size() > m_hwm) {
+      if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+         debug() << "Not publishing all histos (" << histos.size()
+                 << ") as there are more than the hwm (" << m_hwm
+                 << "). Which histograms are sent has been randomized."
+                 << endmsg;
+      }
+      std::mt19937 gen{std::hash<string>{}(Monitoring::hostname())};
+      std::shuffle(begin(iterators), end(iterators), gen);
+   }
+
+   SentHistos sent;
+
+   for (auto it : iterators) {
       Monitoring::RunNumber run;
       Monitoring::HistId id;
-      std::tie(run, id) = entry.first;
+      std::tie(run, id) = it->first;
 
-      string type = std::get<0>(entry.second);
-      string dir = std::get<1>(entry.second);
-      const unique_ptr<TH1>& hist = std::get<2>(entry.second);
+      string type = std::get<0>(it->second);
+      string dir = std::get<1>(it->second);
+      const unique_ptr<TH1>& hist = std::get<2>(it->second);
 
       if (hist == nullptr) {
          // TODO: Skip empty histograms. GetEntries does not
@@ -299,24 +419,37 @@ void Hlt2RootPublishSvc::publishHistograms(zmq::socket_t& socket) const
       } else if (!Monitoring::RootTypeMap.count(hist->ClassName())) {
          warning() << "Unknown type of histogram: " << hist->ClassName() << endmsg;
          continue;
+      } else if (hist->GetEntries() == 0 && sentHistos.count(it->first)) {
+         // Don't send and empty histogram twice
+         continue;
+      } else {
+         sentHistos.emplace(it->first);
       }
 
       if (UNLIKELY(msgLevel(MSG::VERBOSE)))
          verbose() << "Publishing " << run << " " << id << " " << dir << " "
                    << type << " " << hist->GetName() << endmsg;
 
-      // Send run and ID
-      send(socket, run, zmq::SNDMORE);
-      send(socket, id, zmq::SNDMORE);
-      // Send histogram type
-      send(socket, type, zmq::SNDMORE);
-      // Send histogram directory
-      send(socket, dir, zmq::SNDMORE);
-      // Indicate histograms should be added by the SaverSvc
-      send(socket, true, zmq::SNDMORE);
-      // Send histogram
-      send(socket, *hist);
+      array<zmq::message_t, 6> message = {
+         // run and ID
+         zmq().encode(run),
+         zmq().encode(id),
+         // Send histogram type
+         zmq().encode(type),
+         // Send histogram directory
+         zmq().encode(dir),
+         // Indicate histograms should be added by the SaverSvc
+         zmq().encode(true),
+         // Send histogram
+         zmq().encode(*hist)};
+      messages.emplace_back(std::move(message));
+
+      // Indicate we sent this histogram
+      sent.emplace(it->first);
+
+      if (messages.size() >= m_hwm) break;
    }
+   return sent;
 }
 
 //===============================================================================
@@ -403,25 +536,25 @@ Hlt2RootPublishSvc::DirAndHist Hlt2RootPublishSvc::makeRate(const string& path)
 
 //===============================================================================
 Hlt2RootPublishSvc::DirAndHist Hlt2RootPublishSvc::make1D(const Histo1DDef& def) const {
-  string dir, title;
-  std::tie(dir, title) = splitPath(def.title);
+   string dir, title;
+   std::tie(dir, title) = splitPath(def.title);
 
-  std::unique_ptr<TH1D> hist;
-  if (def.variable) {
-     hist = make_unique<TH1D>(title.c_str(), title.c_str(),
-                              def.xbins, def.xedges.data());
-  } else {
-     hist = make_unique<TH1D>(title.c_str(), title.c_str(),
-                              def.xbins, def.xlow, def.xhigh);
-  }
+   std::unique_ptr<TH1D> hist;
+   if (def.variable) {
+      hist = make_unique<TH1D>(title.c_str(), title.c_str(),
+                               def.xbins, def.xedges.data());
+   } else {
+      hist = make_unique<TH1D>(title.c_str(), title.c_str(),
+                               def.xbins, def.xlow, def.xhigh);
+   }
 
-  if (def.labels) setLabels(def.xlabels, hist->GetXaxis());
-  
-  hist->SetDirectory(nullptr);
-  hist->Sumw2();
-  if (UNLIKELY(msgLevel(MSG::VERBOSE)))
-     verbose() << "Created TH1D for " << def.title << endmsg;
-  return {dir, std::move(hist)};
+   if (def.labels) setLabels(def.xlabels, hist->GetXaxis());
+
+   hist->SetDirectory(nullptr);
+   hist->Sumw2();
+   if (UNLIKELY(msgLevel(MSG::VERBOSE)))
+      verbose() << "Created TH1D for " << def.title << endmsg;
+   return {dir, std::move(hist)};
 }
 
 //===============================================================================
@@ -437,7 +570,7 @@ Hlt2RootPublishSvc::DirAndHist Hlt2RootPublishSvc::make2D(const Histo2DDef& def)
   } else if (def.xvariable) {
      hist = make_unique<TH2D>(title.c_str(), title.c_str(),
                               def.xbins, def.xedges.data(),
-                              def.ybins, def.ylow,  def.yhigh);
+                              def.ybins, def.ylow, def.yhigh);
 
   } else if (def.yvariable) {
      hist = make_unique<TH2D>(title.c_str(), title.c_str(),
@@ -447,7 +580,7 @@ Hlt2RootPublishSvc::DirAndHist Hlt2RootPublishSvc::make2D(const Histo2DDef& def)
   } else {
      hist = make_unique<TH2D>(title.c_str(), title.c_str(),
                               def.xbins, def.xlow, def.xhigh,
-                              def.ybins, def.ylow,  def.yhigh);
+                              def.ybins, def.ylow, def.yhigh);
   }
 
   if (def.labels) {
@@ -474,74 +607,105 @@ pair<string, string> Hlt2RootPublishSvc::splitPath(const string& path) const
 
 //===============================================================================
 vector<Hlt2RootPublishSvc::ROOTHistos::value_type>
-Hlt2RootPublishSvc::syncHistograms(zmq::socket_t& inf) const
+Hlt2RootPublishSvc::syncHistograms(zmq::socket_t& inf,
+                                   const ROOTHistos& histos,
+                                   const SentHistos& sentHistos,
+                                   const unordered_set<Monitoring::RunNumber>& runNotEmpty) const
 {
    // Request to sync histo info to make empty histograms
    vector<string> what = {Monitoring::s_HistoInfo};
 
+   // As we do some cleanup, use also the sentHistos to track which histograms
+   // we know about.
    HistoKeys histoKeys;
-   std::for_each(begin(m_histos), end(m_histos),
+   std::for_each(begin(histos), end(histos),
                  [&histoKeys](const ROOTHistos::value_type& entry) {
                     histoKeys.emplace(entry.first);
                  });
 
-   // Send request
-   send(inf, Monitoring::s_Sync, zmq::SNDMORE);
-   send(inf, what, zmq::SNDMORE);
-   send(inf, histoKeys);
-
-   // Wait for reply
-   boost::optional<string> rep;
-   zmq::pollitem_t items[] = {
-      {inf, 0, ZMQ_POLLIN, 0}
-   };
+   std::for_each(begin(sentHistos), end(sentHistos),
+                 [&histoKeys](const SentHistos::value_type& entry) {
+                    histoKeys.emplace(entry);
+                 });
 
-   zmq::poll(&items[0], 1, 1000);
-   if (items[0].revents & ZMQ_POLLIN) {
-      rep = receive<string>(inf);
-   }
 
+   unordered_map<Monitoring::RunNumber, size_t> hpr;
    vector<ROOTHistos::value_type> r;
 
-   if (!rep) {
-      // No reply, recreate info socket
-      inf = infoSocket();
-      warning() << "No reply from Info service to sync request." << endmsg;
-   } else if(*rep == "INCOMING") {
-      unordered_map<Monitoring::RunNumber, size_t> hpr;
-      auto histos = receive<HistoPub>(inf);
-      for (const auto& entry : histos) {
-         hpr[std::get<0>(entry)]++;
-         // Entry consists of (RunNumber, HistId, type, info_string),
-         // where the info string needs to be converted to a message.
-         const auto& type = std::get<2>(entry);
-
-         // The last entry in the tuple is a string that needs to be
-         // decoded according to the type; it's actually used as a
-         // byte array. We make a message out of it again to decode
-         // it.
-         const auto& infoString = std::get<3>(entry);
-         zmq::message_t msg{infoString.size()};
-         std::copy_n(begin(infoString), infoString.size(), static_cast<char*>(msg.data()));
-
-         // Create the histogram
-         auto tdh = makeHistogram(type, msg);
-         if (std::get<2>(tdh)) {
-            r.emplace_back(HistoKey{std::get<0>(entry), std::get<1>(entry)}, std::move(tdh));
-         } else if (UNLIKELY(msgLevel(MSG::DEBUG))) {
-            debug() << "Could not create histogram for run = " << std::get<0>(entry)
-                    << " and id = " << std::get<1>(entry) << endmsg;
-         }
+   // Loop until there is no more information, or our cache is full
+   bool more = true;
+   while (more && histos.size() < m_hwm) {
+      // Send request
+      send(inf, Monitoring::s_Sync, zmq::SNDMORE);
+      send(inf, what, zmq::SNDMORE);
+      send(inf, runNotEmpty, zmq::SNDMORE);
+      send(inf, histoKeys);
+
+      // Wait for reply
+      boost::optional<string> rep;
+      zmq::pollitem_t items[] = {
+         {inf, 0, ZMQ_POLLIN, 0}
+      };
+
+      zmq::poll(&items[0], 1, 1000);
+      if (items[0].revents & ZMQ_POLLIN) {
+         rep = receive<string>(inf);
       }
 
-      if (UNLIKELY(msgLevel(MSG::DEBUG))) {
-         for (const auto& entry : hpr) {
-            debug() << "Created " << std::right << std::setw(6) << to_string(entry.second)
-                    << " empty histograms for run " << entry.first << endmsg;
+      if (!rep) {
+         // No reply, recreate info socket
+         inf = infoSocket();
+         warning() << "No reply from Info service to sync request." << endmsg;
+         break;
+      } else if(*rep == "INCOMING") {
+         auto infos = receive<HistoPub>(inf);
+         more = receive<bool>(inf);
+         for (const auto& entry : infos) {
+            auto type = std::get<0>(entry);
+
+            // The second entry in the tuple is a string that needs to be
+            // decoded according to the type; it's actually used as a
+            // byte array. We make a message out of it again to decode
+            // it.
+            const auto& infoString = std::get<1>(entry);
+            zmq::message_t msg{infoString.size()};
+            std::copy_n(begin(infoString), infoString.size(), static_cast<char*>(msg.data()));
+
+            // The last entry is the vector of keys.
+            const auto& keys = std::get<2>(entry);
+            for (const auto& key : keys) {
+               auto run = std::get<0>(key);
+               auto histID = std::get<1>(key);
+
+               hpr[run]++;
+
+               // Update known keys with received info
+               histoKeys.emplace(run, histID);
+
+               // Create the histogram
+               auto tdh = makeHistogram(type, msg);
+               if (std::get<2>(tdh)) {
+                  r.emplace_back(HistoKey{run, histID}, std::move(tdh));
+               } else if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                  debug() << "Could not create histogram for run = " << run
+                          << " and id = " << histID << endmsg;
+               }
+            }
+         }
+         if (UNLIKELY(msgLevel(MSG::DEBUG) && more)) {
+            debug() << "Syncing again with " << m_infoCon
+                    << " as there is more info." << endmsg;
          }
+      } else {
+         info() << "Got unexpected reply to info request: " << *rep << endmsg;
+         more = false;
       }
-   } else {
-      info() << "Got unexpected reply to info request: " << *rep << endmsg;
    }
+
+   for (const auto& entry : hpr) {
+      info() << "Created " << std::right << std::setw(6) << to_string(entry.second)
+             << " empty histograms for run " << entry.first << endmsg;
+   }
+
    return r;
 }
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.h b/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.h
index 7eaf8a46022dbb5c24eaf080cf9210c56e5f9d77..16563b3aa17efaf39236f12ccb6426b18d244bc7 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.h
+++ b/Online/Hlt2Monitoring/src/component/Hlt2RootPublishSvc.h
@@ -37,8 +37,11 @@ class Hlt2RootPublishSvc : public Hlt2MonBaseSvc {
 public:
    using DirAndHist = std::pair<std::string, std::unique_ptr<TH1>>;
    using TypeDirHist = std::tuple<std::string, std::string, std::unique_ptr<TH1>>;
-   using ROOTHistos = boost::unordered_map<Monitoring::HistoKey, TypeDirHist>;
-
+   using ROOTHistos = std::unordered_map<Monitoring::HistoKey, TypeDirHist,
+                                         Monitoring::KeyHash>;
+   using SentHistos = std::unordered_set<Monitoring::HistoKey,
+                                         Monitoring::KeyHash>;
+   using Queue = std::deque<std::array<zmq::message_t, 6>>;
    Hlt2RootPublishSvc(const std::string& name, ISvcLocator* sl);
 
    StatusCode initialize() override;
@@ -46,10 +49,13 @@ public:
 
 private:
 
-   void publishHistograms(zmq::socket_t&) const;
+   SentHistos publishHistograms(ROOTHistos& histos,
+                                SentHistos& sentHistos,
+                                Queue& messages) const;
 
    std::vector<ROOTHistos::value_type>
-   syncHistograms(zmq::socket_t&) const;
+   syncHistograms(zmq::socket_t&, const ROOTHistos& histos, const SentHistos& sentHistos,
+                  const std::unordered_set<Monitoring::RunNumber>& runNotEmpty) const;
 
    TypeDirHist makeHistogram(std::string, const zmq::message_t&) const;
    TypeDirHist getHistogram(zmq::socket_t&,
@@ -62,8 +68,9 @@ private:
 
    zmq::socket_t infoSocket() const {
       zmq::socket_t inf = socket(zmq::REQ);
-      inf.connect(m_infoCon.c_str());
+      zmq::setsockopt(inf, zmq::LINGER, 0);
       zmq::setsockopt(inf, zmq::RCVTIMEO, 50);
+      inf.connect(m_infoCon.c_str());
       return inf;
    }
 
@@ -77,9 +84,9 @@ private:
    double m_rateStart;
    double m_runDuration;
    double m_rateInterval;
+   size_t m_batchSizeProp;
+   size_t m_batchSize = 0;
+   size_t m_hwm;
 
-   std::atomic<bool> m_stopPublishing;
-
-   ROOTHistos m_histos;
 };
 #endif  // HLT2ROOTPUBLISH_H
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.cpp b/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.cpp
index 3f6e7ead286ed5eb65dd2677dbd47b3e0778e148..c5518d3a8eaefd5dcd1bbc6b4b48630b18916498 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.cpp
@@ -54,12 +54,14 @@ namespace {
    using std::set;
    using std::multimap;
    using std::tuple;
+   using std::stringstream;
 
    using Monitoring::HistDiff;
    using Monitoring::HistoEntry;
    using Monitoring::SaverHistos;
    using Monitoring::ByName;
    using Monitoring::ByRun;
+   using Monitoring::Sorted;
    using Monitoring::WorkMap;
 
    using boost::lexical_cast;
@@ -97,9 +99,9 @@ Hlt2SaverSvc::Hlt2SaverSvc(const string& name, ISvcLocator* loc)
    declareProperty("DataConnection", m_dataCon);
    declareProperty("InfoConnection", m_infoCon);
    declareProperty("TriggerConnection", m_triggerCon = "inproc://Hlt2SaverSvc_trigger");
-   declareProperty("PublishPort", m_pubPort = 0);
    declareProperty("PublishConnection", m_pubCon);
    declareProperty("RegistrarConnection", m_regCon);
+   declareProperty("RegisterInterval", m_regInt = 60);
    declareProperty("BaseDirectory", m_directory);
    declareProperty("SaveInterval", m_saveInterval = 60);
    declareProperty("NormalizeRateTo", m_normalize = "Hlt2RoutingBitsWriter/RoutingBit33");
@@ -108,6 +110,7 @@ Hlt2SaverSvc::Hlt2SaverSvc(const string& name, ISvcLocator* loc)
    declareProperty("UseRunInfoService", m_useInfoSvc = true);
    declareProperty("NWorkers", m_nWorkers = 10);
    declareProperty("RunInfoPollTimeout", m_runInfoPollTime = 0.2);
+   declareProperty("HighWaterMark", m_hwm = 10000);
 }
 
 //===============================================================================
@@ -226,7 +229,80 @@ void Hlt2SaverSvc::saver() {
 }
 
 //===============================================================================
-void Hlt2SaverSvc::function() {
+void Hlt2SaverSvc::registrar(std::string con) const
+{
+   auto internal = zmq().socket(zmq::PAIR);
+   zmq::setsockopt(internal, zmq::LINGER, 0);
+   internal.connect(con.c_str());
+
+   std::vector<zmq::pollitem_t> items(2);
+   items[0] = {internal, 0, zmq::POLLIN, 0};
+
+   auto makePing = [this, &items] {
+      auto ping = zmq().socket(zmq::REQ);
+      zmq::setsockopt(ping, zmq::LINGER, 0);
+      ping.connect(m_regCon.c_str());
+      items[1] = {ping, 0, zmq::POLLIN, 0};
+      return ping;
+   };
+
+   boost::optional<zmq::socket_t> ping;
+
+   bool sentPing = false;
+   bool first = true;
+
+   while (true) {
+      int timeo = sentPing ? 500 : m_regInt * 1000;
+      zmq::poll(&items[0], ping ? 2 : 1, first ? 0 : timeo);
+      if (first) first = false;
+
+      if (items[0].revents & zmq::POLLIN) {
+         auto cmd = receive<std::string>(internal);
+         if (cmd == Monitoring::s_Terminate) {
+            break;
+         } else {
+            warning() << "registar: received bad command message " << cmd << endmsg;
+         }
+      }
+
+      if (!ping) {
+         // attempt to register
+         auto r = registerPublisher();
+         if (r) {
+            send(internal, r->first);
+            send(internal, r->second);
+            ping = makePing();
+         }
+      } else if (sentPing) {
+         sentPing = false;
+         if (items[1].revents & zmq::POLLIN) {
+            auto msg = receive<string>(*ping);
+            if (msg != Monitoring::s_Pong) {
+               warning() << "Bad reply from ping to registrar: " << msg << endmsg;
+            } else {
+               auto app = receive<string>(*ping);
+               if (app != "Registrar") {
+                  warning() << "Received ping reply from " << app
+                            << " while Registrar was expected." << endmsg;
+               }
+            }
+         } else {
+            ping.reset();
+         }
+      } else {
+         // send a ping message
+         sentPing = true;
+         if (!ping) {
+            ping = makePing();
+         }
+         send(*ping, Monitoring::s_Ping);
+      }
+   }
+}
+
+//===============================================================================
+void Hlt2SaverSvc::function()
+{
 
    TThread{};
 
@@ -235,10 +311,13 @@ void Hlt2SaverSvc::function() {
    if (!m_controlConnected) return;
 
    zmq::socket_t data = socket(zmq::SUB);
-   data.connect(m_dataCon.c_str());
+   auto hwm = boost::numeric_cast<int>(m_hwm);
+   zmq::setsockopt(data, zmq::RCVHWM, hwm);
    zmq::setsockopt(data, zmq::SUBSCRIBE, "");
    zmq::setsockopt(data, zmq::LINGER, 0);
-   info() << "Connected data socket to: " << m_dataCon << endmsg;
+   data.connect(m_dataCon.c_str());
+   info() << "Connected data socket to: " << m_dataCon
+          << " with HWM " << hwm << endmsg;
 
    // Clean up queue
    zmq::message_t msg;
@@ -257,7 +336,7 @@ void Hlt2SaverSvc::function() {
       auto workerSocket = socket(zmq::PAIR);
       zmq::setsockopt(workerSocket, zmq::LINGER, 0);
       workerSocket.bind(saveWorkerCon(i).c_str());
-      m_workers.emplace_back(make_tuple(std::thread{[this] (unsigned int i){ saveWorker(i); }, i},
+      m_workers.emplace_back(make_tuple(std::thread{[this](unsigned int i) { saveWorker(i); }, i},
                                         std::move(workerSocket), WorkQueue{}));
    }
 
@@ -270,49 +349,35 @@ void Hlt2SaverSvc::function() {
    std::thread saveThread{[this] { saver(); }};
 
    // publish socket
-   boost::optional<zmq::socket_t> pub;
-   boost::optional<zmq::socket_t> ping;
-   boost::optional<string> pubCon;
-   boost::optional<string> pingCon;
+   boost::optional<zmq::socket_t> pub, ping, reg;
+   boost::optional<std::thread> registrarThread;
+
+   unsigned int pingPort = 0, pubPort = 0;
+   string pingCon, pubCon;
+
+   string intRegCon = "inproc://registrar";
+
    if (!m_pubCon.empty()) {
-      pubCon = m_pubCon;
-   } else if (m_pubPort != 0) {
-      pubCon = "tcp://*:" + to_string(m_pubPort);
-   } else if (!m_regCon.empty()) {
-      auto r = registerPublisher();
-      if (!r) {
-         warning() << "Could not register to obtain port for publication, "
-                   << " not publishing written files." << endmsg;
-      } else {
-         unsigned int pubPort = 0, pingPort = 0;
-         std::tie(pubPort, pingPort) = *r;
-         pubCon = "tcp://*:" + to_string(pubPort);
-         pingCon = "tcp://*:" + to_string(pingPort);
-         debug() << "Registered port " << pubPort << " for publication." << endmsg;
-      }
-   }
-   if (pubCon) {
       pub = zmq().socket(zmq::PUB);
       zmq::setsockopt(*pub, zmq::LINGER, 0);
-      pub->bind(pubCon->c_str());
-      debug() << "Bound publish socket to " << *pubCon << endmsg;
-   }
-   if (pingCon) {
-      ping = zmq().socket(zmq::REP);
-      zmq::setsockopt(*ping, zmq::LINGER, 0);
-      ping->bind(pingCon->c_str());
-      debug() << "Bound publish ping socket to " << *pingCon << endmsg;
+      pub->bind(m_pubCon.c_str());
+      debug() << "Bound publish socket to " << m_pubCon << endmsg;
+   } else if (!m_regCon.empty()) {
+      reg = socket(zmq::PAIR);
+      zmq::setsockopt(*reg, zmq::LINGER, 0);
+      reg->bind(intRegCon.c_str());
+      registrarThread = std::thread{[this, intRegCon]{ registrar(intRegCon); }};
    }
 
    // Initialize poll set
-   std::vector<zmq::pollitem_t> items(ping ? 5 : 4);
-   items.reserve(items.size() +  + m_nWorkers);
+   std::vector<zmq::pollitem_t> items(reg ? 5 : 4);
+   items.reserve(items.size() + m_nWorkers);
    items[0] = {control, 0, zmq::POLLIN, 0};
    items[1] = {data, 0, zmq::POLLIN, 0};
    items[2] = {save, 0, zmq::POLLIN, 0};
    items[3] = {trigger, 0, zmq::POLLIN, 0};
-   if (ping) {
-      items[4] = {*ping, 0, zmq::POLLIN, 0};
+   if (reg) {
+      items[4] = {*reg, 0, zmq::POLLIN, 0};
    }
    for (auto& worker : m_workers) {
       items.push_back({std::get<1>(worker), 0, zmq::POLLIN, 0});
@@ -330,7 +395,7 @@ void Hlt2SaverSvc::function() {
 
    // Remember which workers work on what.
    WorkMap haveWork;
-   decltype(m_workers)::iterator saving;
+   Workers::const_iterator saving;
 
    while (!stopping
           || ((doneSaving.size() < m_workers.size())
@@ -345,11 +410,12 @@ void Hlt2SaverSvc::function() {
          auto cmd = receive<std::string>(control);
          if (cmd == Monitoring::s_Terminate) {
             // Save last histograms now.
-            haveWork = saveHistograms(closedRuns);
+            haveWork = saveHistograms(closedRuns, saving);
             std::set<size_t> withWork;
             for (const auto& entry : haveWork) {
                withWork.emplace(entry.first);
             }
+            send(*reg, Monitoring::s_Terminate);
             send(save, std::move(withWork), zmq::SNDMORE);
             send(save, true);
             stopping = true;
@@ -393,13 +459,16 @@ void Hlt2SaverSvc::function() {
 
             // Either create a new histogram, or update the existing one.
             if (it == end(range)) {
-               m_histos.insert({key.first, type, dir, histo.release()});
+               m_histos.insert({key.first, type, dir, histo.release(), add});
             } else {
                // For a while, the wrong type was written to files, so update it from the published one
                // if needed.
                if (it->type != type) {
                   hbn.modify(it, [&type](HistoEntry& entry) { entry.type = type; });
                }
+               if (it->add != add) {
+                  hbn.modify(it, [&add](HistoEntry& entry) { entry.add = add; });
+               }
                if (add) {
                   it->histo->Add(histo.get());
                } else {
@@ -419,7 +488,7 @@ void Hlt2SaverSvc::function() {
             auto w = receive<int>(save);
             if (w == -1) {
                // Fill worker queues with work.
-               haveWork = saveHistograms(closedRuns);
+               haveWork = saveHistograms(closedRuns, saving);
 
                // Send worker IDs with work to save thread.
                std::set<size_t> withWork;
@@ -432,10 +501,12 @@ void Hlt2SaverSvc::function() {
                send(save, false);
             } else {
                // Tell worker w to start saving and which runs are closed
-               if (UNLIKELY(msgLevel(MSG::VERBOSE)))
-                  verbose() << "Sending save command to worker " << w << endmsg;
-               send(std::get<1>(m_workers[w]), Monitoring::s_Save);
-               saving = begin(m_workers) + w;
+               if (std::distance(m_workers.cbegin(), saving) != w) {
+                  if (UNLIKELY(msgLevel(MSG::VERBOSE)))
+                     verbose() << "Sending save command to worker " << w << endmsg;
+                  send(std::get<1>(m_workers[w]), Monitoring::s_Save);
+                  saving = m_workers.cbegin() + w;
+               }
             }
          }
       }
@@ -477,9 +548,49 @@ void Hlt2SaverSvc::function() {
          }
       }
 
+      // Setup the ping and pub connections if/when the registrar
+      // thread tells us the ports to use.
+      if (reg && items[4].revents & zmq::POLLIN) {
+         auto p = receive<unsigned int>(*reg);
+         auto pp = receive<unsigned int>(*reg);
+
+         if (p != pubPort) {
+            if (!pubCon.empty() ) {
+               pub->disconnect(pubCon.c_str());
+            }
+
+            pubCon = "tcp://*:" + std::to_string(p);
+            // Setup pub connection
+            pub = zmq().socket(zmq::PUB);
+            zmq::setsockopt(*pub, zmq::LINGER, 0);
+            pub->bind(pubCon.c_str());
+            pubPort = p;
+            debug() << "Bound publish socket to " << pubCon << endmsg;
+         }
+
+         if (pp != pingPort) {
+            if (!pingCon.empty() ) {
+               ping->disconnect(pingCon.c_str());
+            }
+            pingCon = "tcp://*:" + std::to_string(pp);
+
+            // Setup ping connection
+            ping = socket(zmq::REP);
+            zmq::setsockopt(*ping, zmq::LINGER, 0);
+            ping->bind(pingCon.c_str());
+            if (pingPort != 0) {
+               items[5] = {*ping, 0, zmq::POLLIN, 0};
+            } else {
+               items.insert(items.begin() + 5, {*ping, 0, zmq::POLLIN, 0});
+            }
+            pingPort = pp;
+            debug() << "Bound publish ping socket to " << pingCon << endmsg;
+         }
+      }
+
       // Reply to pings from registration server to see if we're
       // alive.
-      if (ping && items[4].revents & zmq::POLLIN) {
+      if (ping && items[5].revents & zmq::POLLIN) {
          auto msg = receive<string>(*ping);
          verbose() << "Received message " << msg << " on ping socket." << endmsg;
          if (msg == Monitoring::s_Ping) {
@@ -571,7 +682,15 @@ void Hlt2SaverSvc::saveWorker(const unsigned int worker) {
          // Copy to the EOR file
          boost::system::error_code ec;
          fs::copy_file(src_file, dest_file, ec);
-         if (ec) verbose() << "Copied file for run " << run << " to " << dest_file.string() << endmsg;
+         if (!ec.value()) {
+            verbose() << "Copied file for run " << run << " to " << dest_file.string() << endmsg;
+         } else {
+            error() << "Failed to copy file for run " << run << " to " << dest_file.string()
+                    << ". Histograms have not been saved. Error from system: " << ec.message()
+                    << ". This indicates a problem with machine "
+                    << Monitoring::hostname() << ", please check with Online."
+                    << endmsg;
+         }
          return ec;
       }
       return false;
@@ -625,7 +744,8 @@ void Hlt2SaverSvc::saveWorker(const unsigned int worker) {
 }
 
 //===============================================================================
-WorkMap Hlt2SaverSvc::saveHistograms(const std::unordered_set<Monitoring::RunNumber>& closed)
+WorkMap Hlt2SaverSvc::saveHistograms(const std::unordered_set<Monitoring::RunNumber>& closed,
+                                     Workers::const_iterator nowSaving)
 {
 
    // Get list of runs for which we have updates
@@ -640,9 +760,14 @@ WorkMap Hlt2SaverSvc::saveHistograms(const std::unordered_set<Monitoring::RunNum
    sizes.reserve(m_workers.size());
 
    // Lambda to find the first worker with the least work to do.
-   auto findWorker = [&sizes](decltype(m_workers)& workers) {
+   auto findWorker = [&sizes, nowSaving](Workers& workers) {
       sizes.clear();
       for (auto it = begin(workers), last = end(workers); it != last; ++it) {
+
+         // Don't use the worker that is now saving for anything
+         if (it == nowSaving) continue;
+
+         // Count the number of entries in the work queue
          const WorkQueue& queue = std::get<2>(*it);
          size_t n = std::count_if(begin(queue), end(queue),
                                   [](const WorkQueue::value_type& entry) {
@@ -689,7 +814,12 @@ WorkMap Hlt2SaverSvc::saveHistograms(const std::unordered_set<Monitoring::RunNum
    while (run != end(runs)) {
       auto saveIt = saving.find(*run);
       if (saveIt != end(saving)) {
-         workOnRun(*run, saveIt->second, closed.count(*run));
+         // Only work on a run if the worker that was already working
+         // on it is not doing so now.
+         if (saveIt->second != nowSaving) {
+            workOnRun(*run, saveIt->second, closed.count(*run));
+         }
+         // Do not dispatch this run to any worker
          run = runs.erase(run);
       } else {
          ++run;
@@ -713,19 +843,29 @@ WorkMap Hlt2SaverSvc::saveHistograms(const std::unordered_set<Monitoring::RunNum
    }
 
 
-   for (decltype(m_workers)::const_iterator worker = begin(m_workers), last = end(m_workers);
+   for (Workers::const_iterator worker = begin(m_workers), last = end(m_workers);
         worker != last; ++worker) {
+      // Skip the worker that is saving right now to avoid sending it
+      // extra save commands.
+      if (worker == nowSaving) continue;
       const auto& queue = std::get<2>(*worker);
       if (queue.empty()) continue;
       size_t w = std::distance(m_workers.cbegin(), worker);
-      debug() << "Worker " << std::right << std::setw(3) << w << " works on runs:";
+      stringstream msg;
+      msg << "Worker " << std::right << std::setw(3) << w << " works on runs:";
       for (const auto& entry : queue) {
          if (!std::get<2>(entry).empty() || std::get<1>(entry)) {
-            debug() << " " << std::get<0>(entry).run;
+            msg << " " << std::get<0>(entry).run;
             haveWork.emplace(w, std::get<0>(entry).run);
          }
       }
-     debug() << endmsg;
+      if (haveWork.count(w)) {
+         info() << msg.str() << endmsg;
+      }
+   }
+
+   if (haveWork.empty()) {
+      info() << "No work to divide." << endmsg;
    }
 
    // Return the set of workers that have received work.
@@ -825,7 +965,7 @@ Hlt2SaverSvc::saveHistograms(const Monitoring::RunInfo& runInfo,
    }
 
    // Loop over histograms for that run
-   for (const auto& entry : histos) {
+   for (const auto& entry : histos.get<Sorted>()) {
       auto histo = entry.histo.get();
       auto dir = entry.dir;
       auto outDir = static_cast<TDirectoryFile*>(outFile.Get(dir.c_str()));
@@ -888,8 +1028,11 @@ Hlt2SaverSvc::saveHistograms(const Monitoring::RunInfo& runInfo,
       // Copy to the saveset
       fs::copy_file(outPath, file, ec);
       if (ec) {
-         warning() << "Could not copy file " << outPath << " to "
-                   << file.string() << endmsg;
+         error() << "Could not copy file " << outPath << " to " << file.string()
+                 << ". Error from system: " << ec.message()
+                 << ". This indicates a problem with machine "
+                 << Monitoring::hostname() << ", please check with Online."
+                 << endmsg;
       } else {
          if (UNLIKELY(msgLevel(MSG::DEBUG)))
             debug() << "Saved histograms for run " << runInfo.run << " to "
@@ -908,7 +1051,11 @@ Hlt2SaverSvc::saveHistograms(const Monitoring::RunInfo& runInfo,
    bool success = fs::remove(outPath, ec);
    success &= !ec;
    if (!success) {
-      warning() << "Could not remove file " << outPath.string() << endmsg;
+      error() << "Could not remove file " << outPath.string()
+              << ". Error from system: " << ec.message()
+              << ". This indicates a problem with machine "
+              << Monitoring::hostname() << ", please check with Online."
+              << endmsg;
    }
    return make_tuple(closed, file.string(), fileByRun.string());
 }
@@ -1056,7 +1203,11 @@ std::pair<fs::path, bool> Hlt2SaverSvc::filename(const Monitoring::RunInfo& runI
       bool success = fs::create_directories(directory, ec);
       success &= !ec;
       if (!success) {
-         warning() << "Failed to create directory " << directory << endmsg;
+         error() << "Failed to create directory " << directory
+                 << ". Histograms will not be saved. Error from system: "
+                 << ec.message() << ". This indicates a problem with machine "
+                 << Monitoring::hostname() << ", please check with Online."
+                 << endmsg;
          return make_pair(directory, false);
       }
    }
diff --git a/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.h b/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.h
index 419b216dbc7545f8ceee3b0e118ad0c0e3ff297f..62ffb1f374a9eb653bbf422a3cd3acb68cb88500 100644
--- a/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.h
+++ b/Online/Hlt2Monitoring/src/component/Hlt2SaverSvc.h
@@ -29,6 +29,11 @@
 class Hlt2SaverSvc : public Hlt2MonBaseSvc {
 public:
 
+   // Add boolean to indicate closing
+   using WorkQueue = std::vector<std::tuple<Monitoring::RunInfo, bool, Monitoring::SaverHistos>>;
+   using Worker = std::tuple<std::thread, zmq::socket_t, WorkQueue>;
+   using Workers = std::vector<Worker>;
+
    /// Standard constructor
    Hlt2SaverSvc(const std::string& name, ISvcLocator* sl);
 
@@ -39,16 +44,15 @@ public:
 
 private:
 
-   // Add boolean to indicate closing
-   using WorkQueue = std::vector<std::tuple<Monitoring::RunInfo, bool, Monitoring::SaverHistos>>;
-   using Worker = std::tuple<std::thread, zmq::socket_t, WorkQueue>;
-   using Workers = std::vector<Worker>;
-
    // Function used by thread to trigger saving of histograms
    void saver();
 
+   // Function used by thread to communicate with the registrar
+   void registrar(std::string con) const;
+
    // Save all known histograms to file
-   Monitoring::WorkMap saveHistograms(const std::unordered_set<Monitoring::RunNumber>& closed);
+   Monitoring::WorkMap saveHistograms(const std::unordered_set<Monitoring::RunNumber>& closed,
+                                      Workers::const_iterator nowSaving);
 
    // Save all histograms of a single run to file
    std::tuple<bool, std::string, std::string>
@@ -94,9 +98,9 @@ private:
    std::string m_dataCon;
    std::string m_infoCon;
    std::string m_triggerCon;
-   unsigned int m_pubPort;
    std::string m_pubCon;
    std::string m_regCon;
+   int m_regInt;
    std::string m_normalize;
    std::string m_application;
    int m_saveInterval;
@@ -104,6 +108,7 @@ private:
    bool m_useInfoSvc;
    double m_runInfoPollTime;
    std::string m_runInfoType;
+   size_t m_hwm;
 
    // Data members
    std::atomic<bool> m_stopSaving;
diff --git a/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.cpp b/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.cpp
index a04c1d162c2b0894e8f73e21fe38c3191ff751a3..3d5bdfc5e914c6c9d2ff53f04ee1033656c05bfa 100644
--- a/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.cpp
+++ b/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.cpp
@@ -1,4 +1,3 @@
-// Include files
 #include <string>
 #include <tuple>
 #include <vector>
@@ -8,8 +7,8 @@
 #include <thread>
 #include <unordered_map>
 #include <sstream>
+#include <chrono>
 
-// boost
 #include <boost/regex.hpp>
 #include <boost/archive/text_oarchive.hpp>
 #include <boost/numeric/conversion/cast.hpp>
@@ -17,17 +16,14 @@
 #include <boost/optional.hpp>
 #include <boost/functional/hash.hpp>
 
-// Gaudi
 #include <GaudiKernel/ParsersFactory.h>
 
-// ZeroMQ
 #include <ZeroMQ/IZeroMQSvc.h>
 
-// Hlt2Monitoring
 #include <Hlt2Monitoring/Utilities.h>
 #include <Hlt2Monitoring/Serialize.h>
+#include <Hlt2Monitoring/Types.h>
 
-// local
 #include "ZmqTransmitterSvc.h"
 
 //-----------------------------------------------------------------------------
@@ -66,6 +62,7 @@ namespace {
    namespace fs = boost::filesystem;
 
    using ms = chr::duration<double, std::milli>;
+   using namespace std::chrono_literals;
 
 #if __cplusplus <= 201103L
    //TODO: this  adds C++14 'make_unique'... remove once we move to C++14...
@@ -81,13 +78,13 @@ namespace {
 //=============================================================================
 ZmqTransmitterSvc::ZmqTransmitterSvc(const string& name,
                                      ISvcLocator* pSvcLocator)
-   : base_class (name , pSvcLocator),
-     m_internalConCounter{1}
+   : base_class (name , pSvcLocator)
 {
    declareProperty("HostnameRegex", m_hostRegex = "hlt(?<subfarm>[a-f]{1}[0-9]{2})(?<node>[0-9]{2})?");
    declareProperty("Application", m_application = {"Moore2", "v1r0"});
    declareProperty("InfoPort", m_infoPort = 31339);
    declareProperty("IPCConnectionPath", m_connectionPath = "/run/HLT2");
+   declareProperty("MaxMonRestart", m_maxMonRestart = 0);
 }
 
 //=============================================================================
@@ -105,9 +102,11 @@ StatusCode ZmqTransmitterSvc::initialize()
    if (!m_zmqSvc) {
       fatal() << "ZeroMQSvc not found" << endmsg;
       return StatusCode::FAILURE;
-   }
+   }   
 
    // Create directories needed for ipc connections.
+   debug() << "Checking for existence of " << m_connectionPath
+           << " and creating if needed." << endmsg;
    fs::path p(m_connectionPath);
    if (!fs::exists(p)) {
       boost::system::error_code ec;
@@ -123,6 +122,82 @@ StatusCode ZmqTransmitterSvc::initialize()
    return sc;
 }
 
+//=============================================================================
+void ZmqTransmitterSvc::transmitWrapper()
+{
+   auto internal = internalSocket(internalCon("internal"), false);
+   auto checkCon = internalCon("check");
+   auto check = internalSocket(checkCon, true, {{zmq::SNDTIMEO, 100}});
+   auto pubCon = internalCon("publish");
+   auto pub = internalSocket(pubCon, true, {{zmq::SNDTIMEO, 100}});
+   auto debugCon = internalCon("debug");
+
+   std::exception_ptr checkException{nullptr};
+   std::thread checkThread{[this, checkCon, debugCon, &checkException] {
+         try {
+            sendCheck(checkCon, debugCon);
+         } catch (const zmq::error_t& e) {
+            error() << "Check caught unhandled 0MQ exception: " << e.what() << endmsg;
+            checkException = std::current_exception();
+         } catch(const std::exception& e) {
+            error() << "Check caught unhandled std exception: " << e.what() << endmsg;
+            checkException = std::current_exception();
+         } catch (...) {
+            error() << "Check caught unhandled other exception." << endmsg;
+            checkException = std::current_exception();
+         }
+      }};
+
+   std::exception_ptr pubException{nullptr};
+   std::thread pubThread{[this, pubCon, &pubException] {
+         try {
+            publish(pubCon);
+         } catch (const zmq::error_t& e) {
+            error() << "Publish caught unhandled 0MQ exception: " << e.what() << endmsg;
+            pubException = std::current_exception();
+         } catch (const std::exception& e) {
+            error() << "Publish caught unhandled std exception: " << e.what() << endmsg;
+            pubException = std::current_exception();
+         } catch (...) {
+            error() << "Publish caught unhandled other exception." << endmsg;
+            pubException = std::current_exception();
+         }
+      }};
+
+   try {
+      transmit(internal, check, pub, pubException, checkException);
+   } catch (const zmq::error_t& e) {
+      error() << "Transmit caught unhandled 0MQ exception: " << e.what() << endmsg;
+      m_transmitException = std::current_exception();
+   } catch (const std::exception& e) {
+      error() << "Transmit caught unhandled std exception: " << e.what() << endmsg;
+      m_transmitException = std::current_exception();
+   } catch (...) {
+      error() << "Transmit caught unhandled other exception." << endmsg;
+      m_transmitException = std::current_exception();
+   }
+
+   try {
+      if (!checkException) {
+         zmq().send(check, Monitoring::s_Command, zmq::SNDMORE);
+         zmq().send(check, Monitoring::s_Terminate);
+      }
+      if (!pubException) {
+         zmq().send(pub, Monitoring::s_Terminate);
+      }
+   } catch (const zmq::error_t& e) {
+      error() << "Caught zmq error while exiting: " << e.what() << endmsg;
+   }
+   checkThread.join();
+   pubThread.join();
+
+   if (pubException) {
+      m_transmitException = pubException;
+   } else if (checkException) {
+      m_transmitException = checkException;
+   }
+}
+
 //=============================================================================
 void ZmqTransmitterSvc::setup()
 {
@@ -153,7 +228,24 @@ void ZmqTransmitterSvc::setup()
    }
 
    if (!m_thread) {
-      m_thread = make_unique<std::thread>([this]{ transmit(); });
+      m_thread = make_unique<std::thread>([this]{
+            unsigned int tries = 0;
+            while (tries <= m_maxMonRestart) {
+               transmitWrapper();
+               if (m_transmitException) {
+                  warning() << "Restarting monitoring thread try " << tries << endmsg;
+                  std::this_thread::sleep_for(1s);
+                  ++tries;
+                  if (tries > m_maxMonRestart) {
+                     error() << "Maximum number monitoring restarts "
+                             << "reached, giving up " << endmsg;
+                  }
+               } else {
+                  break;
+               }
+            }
+            m_ok = false;
+         });
    }
 }
 
@@ -167,8 +259,10 @@ StatusCode ZmqTransmitterSvc::start() {
 StatusCode ZmqTransmitterSvc::finalize()
 {
    if (m_thread) {
-      zmq().send(*m_internal, Monitoring::s_Command, zmq::SNDMORE);
-      zmq().send(*m_internal, Monitoring::s_Terminate);
+      if (!m_transmitException) {
+         zmq().send(*m_internal, Monitoring::s_Command, zmq::SNDMORE);
+         zmq().send(*m_internal, Monitoring::s_Terminate);
+      }
       m_thread->join();
    }
    m_internal.reset();
@@ -183,7 +277,7 @@ ZmqTransmitterSvc::outputSocket(size_t nMsg, size_t hwm,
                                 const std::string forwardType,
                                 const unsigned int sourceID,
                                 const bool checkEverySend,
-                                const unsigned int interval)
+                                const unsigned int i)
 {
    // If we haven't started yet, but one of our users is in start,
    // we're still good to go.
@@ -197,14 +291,19 @@ ZmqTransmitterSvc::outputSocket(size_t nMsg, size_t hwm,
    boost::hash<pair<string, unsigned int>> hasher{};
    size_t id = hasher(make_pair(forwardType, count));
 
+   // Batch size for sending is such that all credit (and messages)
+   // should be sent in 10 parts of a second, so keep the interval
+   // larger than 10 seconds
+   auto interval = (i != 0 && i < 12) ? 12 : i;
+
    debug() << "Registering forward type: " << forwardType << endmsg
            << "                      id: " << id << endmsg
            << "              connection: " << outputInfo.second << endmsg
            << "            message size: " << nMsg << endmsg
            << "         high water mark: " << hwm << endmsg
+           << "        check every send: " << checkEverySend << endmsg
            << "                interval: " << interval << endmsg;
 
-
    Transmitter::Forwarder forward{nMsg, hwm, std::move(outputInfo),
                                   internalCon("internal", count),
                                   std::move(forwardType), sourceID,
@@ -212,6 +311,7 @@ ZmqTransmitterSvc::outputSocket(size_t nMsg, size_t hwm,
 
    auto internal = zmq().socket(zmq::PAIR);
    zmq::setsockopt(internal, zmq::LINGER, 0);
+   zmq::setsockopt(internal, zmq::SNDTIMEO, 100);
    internal.bind(forward.internalCon.c_str());
 
    zmq().send(*m_internal, Monitoring::s_Command, zmq::SNDMORE);
@@ -227,27 +327,58 @@ ZmqTransmitterSvc::outputSocket(size_t nMsg, size_t hwm,
    return r;
 }
 
+//===============================================================================
+void ZmqTransmitterSvc::publish(const string pubCon) const {
+
+   auto internal = zmq().socket(zmq::PAIR);
+   zmq::setsockopt(internal, zmq::LINGER, 0);
+   internal.connect(pubCon.c_str());
+
+   zmq::pollitem_t items [] = {
+      { internal, 0, ZMQ_POLLIN, 0 },
+   };
+
+   while(true) {
+      zmq::poll(&items[0], 1, 1000);
+      if (items[0].revents & ZMQ_POLLIN) {
+         auto msg = zmq().receive<string>(internal);
+         if (msg == Monitoring::s_Terminate) {
+            break;
+         } else {
+            warning() << "Publish thread got unknown message: "
+                      << msg << endmsg;
+         }
+      }
+      zmq().send(internal, Monitoring::s_Publish);
+   }
+}
+
 //===============================================================================
 void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon) const {
 
    auto dbg = zmq().socket(zmq::PAIR);
    zmq::setsockopt(dbg, zmq::LINGER, 0);
+   zmq::setsockopt(dbg, zmq::SNDTIMEO, 100);
    dbg.connect(debugCon.c_str());
 
    auto sendDebug = [this, &dbg](string message) {
-      zmq().send(dbg, message);
+      try {
+         zmq().send(dbg, message);
+      } catch (const zmq::error_t&) {
+         debug() << message << endmsg;
+      }
    };
 
    auto internal = zmq().socket(zmq::PAIR);
    zmq::setsockopt(internal, zmq::LINGER, 0);
    internal.connect(checkCon.c_str());
-   sendDebug("sendCheck: connected internal connection to " + checkCon);
+   sendDebug("connected internal connection to " + checkCon);
 
    auto trigger = zmq().socket(zmq::SUB);
    zmq::setsockopt(trigger, zmq::LINGER, 0);
    trigger.connect(m_triggerCon.c_str());
    zmq::setsockopt(trigger, zmq::SUBSCRIBE, "");
-   sendDebug("sendCheck: connected trigger connection to " + m_triggerCon);
+   sendDebug("connected trigger connection to " + m_triggerCon);
 
    zmq::pollitem_t items [] = {
       { internal, 0, ZMQ_POLLIN, 0 },
@@ -258,7 +389,9 @@ void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon)
    using KeyHash = boost::hash<Key>;
    // Map of type for which we are checking to {type connected, check interval for type}
    unordered_map<Key, pair<bool, unsigned int>, KeyHash> checkInfo;
-
+   unordered_map<Key, bool, KeyHash> checking;
+   
+   
    bool stop = false;
    unordered_map<size_t, string> id2Type;
 
@@ -337,28 +470,24 @@ void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon)
       }
    }
 
-   auto checkAgain = [&intervals, &checkInfo, maxTries](const tuple<string, size_t, bool>& t) {
+   auto checkAgain = [&intervals, &checkInfo, &checking, maxTries](const tuple<string, size_t, bool>& t) {
       Key key{std::get<0>(t), std::get<1>(t)};
-      // Start checking this forwardType again.
-      auto it = intervals.find(key);
-      if (it == end(intervals)) {
-         // If we were not yet checking, start now with the registered interval
-         // if the registered interval is 0, it means no regular check, so force a
-         // check by setting the interval to 1.
-         auto interval = checkInfo[key].second != 0 ? checkInfo[key].second : 1;
-         intervals.emplace(key, make_pair(interval, maxTries));
-      } else if (checkInfo[key].second != 0) {
-         // If we were already checking, extend the interval back to the original
-         it->second.first = checkInfo[key].second;
+      // Check this forward type now.
+      auto it = checking.find(key);
+      if (it == end(checking) || !it->second) {
+         checking[key] = false;
+         intervals[key] = make_pair(500, maxTries);
       }
    };
 
-   auto shouldCheck = [](const decltype(intervals)::value_type& entry) {
-      return entry.second.first <= 0 && entry.second.second > 0;
+   auto shouldCheck = [&checking](const decltype(intervals)::value_type& entry) {
+      return !checking[entry.first] && entry.second.first <= 0 && entry.second.second > 0;
    };
 
    vector<tuple<string, size_t, bool>> connected;
    std::set<Key> tried;
+   double timeout = 999;
+
 
    while (!stop) {
       double diff = 0.;
@@ -367,61 +496,43 @@ void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon)
          if (interval.second.first > 0) interval.second.first -= diff;
       }
 
-      // if (msgLevel(MSG::DEBUG)) {
-      //    stringstream s;
-      //    s << "Poll returned: " << connected.size();
-      //    for (size_t i = 0; i < connected.size(); ++i) {
-      //       s << " " << i << " " << connected[i].first << " " << connected[i].second;
-      //    }
-      //    s << " " << diff;
-      //    sendDebug(s.str());
-      //    sendDebug(string{"stop: "} + to_string(stop));
-      //    s.str(string{});
-
-      //    s << "Intervals:";
-      //    for (const auto& entry : intervals) {
-      //       s << " " << entry.first << " " << entry.second.first << " " << entry.second.second;
-      //    }
-      //    sendDebug(s.str());
-      //    s.str(string{});
-
-      //    s << "Tried:";
-      //    for (const auto& entry : tried) {
-      //       s << " " << entry;
-      //    }
-      //    sendDebug(s.str());
-      //    s.str(string{});
-      // }
-
-      if (stop) break;
+      if (UNLIKELY(msgLevel(MSG::VERBOSE))) {
+         stringstream s;
+         s << "Poll returned: " << connected.size();
+         for (size_t i = 0; i < connected.size(); ++i) {
+            s << " " << i << " " << std::get<0>(connected[i]) << " "
+              << std::get<1>(connected[i]) << " " << std::get<2>(connected[i]);
+         }
+         s << " " << diff;
+         sendDebug(s.str());
+         sendDebug(string{"stop: "} + to_string(stop));
+         s.str(string{});
+
+         s << "Intervals:";
+         for (const auto& entry : intervals) {
+            s << " " << entry.first.first << " " << entry.first.second
+              << " " << entry.second.first << " " << entry.second.second;
+         }
+         sendDebug(s.str());
+         s.str(string{});
 
-      for (const auto& entry : connected) {
-         if (!std::get<0>(entry).empty() and !std::get<2>(entry)) {
-            checkAgain(entry);
+         s << "Tried:";
+         for (const auto& entry : tried) {
+            s << " " << entry.first << " " << entry.second;
          }
-      }
+         sendDebug(s.str());
+         s.str(string{});
 
-      // If nobody was connected, reduce all trial counts by 1 and remove those that reached the max tried.
-      if (none_of(begin(connected), end(connected), [](const decltype(connected)::value_type& entry) {
-               return std::get<2>(entry);
-            })) {
-         auto tr = tried.begin();
-         while(tr != end(tried)) {
-            auto it = intervals.find(*tr);
-            if (it != end(intervals)) {
-               --(it->second.second);
-               if (it->second.second == 0) {
-                  tr = tried.erase(tr);
-                  intervals.erase(it);
-               } else {
-                  ++tr;
-               }
-            } else {
-               ++tr;
-            }
+         s << "Checking:";
+         for (const auto& entry : checking) {
+            s << " " << entry.first.first << " " << entry.first.second << " "
+              << entry.second;
          }
+         sendDebug(s.str());
       }
 
+      if (stop) break;
+
       for (const auto& entry : connected) {
          if (std::get<0>(entry).empty()) continue;
          Key key{std::get<0>(entry), std::get<1>(entry)};
@@ -434,9 +545,9 @@ void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon)
          }
 
          if (std::get<2>(entry)) {
-            Key key{std::get<0>(entry), std::get<1>(entry)};
             tried.erase(key);
-
+            checking[key] = false;
+            
             if (!it->second.first) {
                it->second.first = true;
 
@@ -451,63 +562,91 @@ void ZmqTransmitterSvc::sendCheck(const string checkCon, const string debugCon)
          }
       }
 
+      for (const auto& entry : connected) {
+         if (!std::get<0>(entry).empty() && !std::get<2>(entry)) {
+            checkAgain(entry);
+         }
+      }
+
+      timeout -= diff;
+
+      if (timeout > 0) continue;
+      
+      timeout = 999;
+
+      // If nobody was connected, reduce all trial counts by 1 and remove those that reached the max tried.
+      if (none_of(begin(connected), end(connected), [](const decltype(connected)::value_type& entry) {
+               return std::get<2>(entry);
+            }) && timeout < 0) {
+         auto tr = tried.begin();
+         while(tr != end(tried)) {
+            auto it = intervals.find(*tr);
+            if (it != end(intervals)) {
+               --(it->second.second);
+               if (it->second.second == 0) {
+                  tr = tried.erase(tr);
+                  intervals.erase(it);
+                  checking[*tr] = false;
+               } else {
+                  ++tr;
+               }
+            } else {
+               ++tr;
+            }
+         }
+      }
+      
       // Find out which ones should be checked for connectivity
       for (auto& interval : intervals) {
          if (!shouldCheck(interval)) continue;
 
-         // Flag that we are no longer connected
+         // Flag that we are no longer connected and checking
+         checking[interval.first] = true;
          auto it = checkInfo.find(interval.first);
-         if (it != end(checkInfo)) it->second.first = false;
+         if (it != end(checkInfo)) {
+            it->second.first = false;
+         }
+         
+         stringstream s;
+         s << "Sending check message for type " << interval.first.first << " " << interval.first.second;
+         sendDebug(string{s.str()});
 
          // Send check message
          zmq().send(internal, Monitoring::s_Publish, zmq::SNDMORE);
          zmq().send(internal, interval.first.first, zmq::SNDMORE);
          zmq().send(internal, interval.first.second);
+
          tried.insert(interval.first);
       }
    }
 }
 
 //===============================================================================
-void ZmqTransmitterSvc::transmit() const {
+void ZmqTransmitterSvc::transmit(zmq::socket_t& internal, zmq::socket_t& check,
+                                 zmq::socket_t& pub, 
+                                 std::exception_ptr& pubException,
+                                 std::exception_ptr& checkException) const {
 
    boost::regex reInfoSvc{"tcp://\\*(:[0-9]+)"};
 
    std::unordered_map<size_t, zmq::socket_t> dataInConnections;
    std::unordered_map<std::string, std::pair<size_t, zmq::socket_t>> svcConnections;
 
-   // check if our info service is tcp, such that we can create the right
-   // connection string for the remote check sender to connect to.
-
-   using Options = vector<pair<zmq::SocketOptions, int>>;
-   auto internalSocket = [this](string con, bool bind = true,
-                                Options options = Options{}) -> zmq:: socket_t{
-      auto s = zmq().socket(zmq::PAIR);
-      zmq::setsockopt(s, zmq::LINGER, 0);
-      for (const auto& e : options) zmq::setsockopt(s, e.first, e.second);
-      bind ? s.bind(con.c_str()) : s.connect(con.c_str());
-      return s;
-   };
-
-   auto internal = internalSocket(internalCon("internal"), false);
-   auto checkCon = internalCon("check");
-   auto check = internalSocket(checkCon, true, {{zmq::SNDTIMEO, 100}});
    auto debugCon = internalCon("debug");
    auto dbg = internalSocket(debugCon, true);
 
-   std::thread checkThread{[this, checkCon, debugCon] {
-         sendCheck(checkCon, debugCon);
-      }};
-
    std::vector<zmq::pollitem_t> items;
    items.push_back({internal, 0, ZMQ_POLLIN, 0});
    items.push_back({check, 0, ZMQ_POLLIN, 0});
+   items.push_back({pub, 0, ZMQ_POLLIN, 0});
    items.push_back({dbg, 0, ZMQ_POLLIN, 0});
 
    using Messages = vector<zmq::message_t>;
    // key:   forward type
-   // value: (JobInfo, output socket, messages to be send, connected, checking)
-   using InfoTuple = tuple<Monitoring::JobInfo, zmq::socket_t, deque<Messages>, bool, bool>;
+   // value: (JobInfo, output socket, messages to be send, connected, checking,
+   //         credit, batchSize, number of publishes)
+   using InfoTuple = tuple<Monitoring::JobInfo, zmq::socket_t, deque<Messages>, bool, bool,
+                           size_t, size_t, size_t>;
    using Key = pair<string, size_t>;
    using Infos = unordered_map<Key, InfoTuple, boost::hash<Key>>;
    Infos infos;
@@ -517,24 +656,19 @@ void ZmqTransmitterSvc::transmit() const {
    Forwarders forwarders;
 
    auto sendCheckMsg = [this] (zmq::socket_t& output, const Monitoring::JobInfo& jobInfo) {
-      // Send our connection state to the check thread
-      debug() << "Sending check message to request reply to " << jobInfo.connection << endmsg;
+      if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+         debug() << "Sending check message to request reply to " << jobInfo.connection << endmsg;
+      }
       zmq().send(output, Monitoring::s_Check, zmq::SNDMORE);
       zmq().send(output, jobInfo);
    };
 
-   auto sendMessage = [this](zmq::socket_t& output, Messages& msgs) {
-      size_t n = msgs.size();
-      for (unsigned int i = 0; i < n; ++i) {
-         zmq().send(output, msgs[i], (i < (n - 1)) ? zmq::SNDMORE : 0);
-      }
-   };
-
    // Output socket factory lambda
    auto outputFactory = [this](zmq::SocketTypes type, const std::string con) {
       auto output = [this, con, type] {
          zmq::socket_t s = zmq().socket(type);
          zmq::setsockopt(s, zmq::LINGER, 0);
+         zmq::setsockopt(s, zmq::SNDHWM, 10000);
          s.connect(con.c_str());
          return s;
       };
@@ -550,18 +684,17 @@ void ZmqTransmitterSvc::transmit() const {
    std::unordered_map<Key, bool, boost::hash<Key>> recreated;
    auto checkRecreated = [&recreated](const Key& k) { return recreated.count(k) && recreated[k]; };
 
+   m_ok = true;
+
    while (true) {
       //  Process messages from all sockets
       zmq::poll (&items[0], items.size(), 1000);
       for (const auto& entry : infos) recreated[entry.first] = false;
 
       if (items[0].revents & ZMQ_POLLIN) {
-         auto typeMsg = zmq().receive<zmq::message_t>(internal);
-         auto type = zmq().decode<string>(typeMsg);
+         auto type = zmq().receive<string>(internal);
          if (type == Monitoring::s_Command) {
-            auto cmdMsg = zmq().receive<zmq::message_t>(internal);
-            auto cmd = zmq().decode<string>(cmdMsg);
-            zmq().send(check, typeMsg, zmq::SNDMORE);
+            auto cmd = zmq().receive<string>(internal);
             if (cmd == Monitoring::s_Register) {
                // If a register command is received, add that forward destination
                // Receive the forwarder information
@@ -605,14 +738,16 @@ void ZmqTransmitterSvc::transmit() const {
                             << forwarder.internalCon.c_str() << endmsg;
                }
                items.push_back({r.first->second, 0, ZMQ_POLLIN, 0});
-               InfoTuple t = make_tuple(std::move(info), makeOutput(), deque<Messages>{}, false, true);
+               InfoTuple t = make_tuple(std::move(info), makeOutput(), deque<Messages>{}, false, true,
+                                        0, 0, 0);
                infos.emplace(make_pair(ft, id), std::move(t));
 
                // Store the forwarder info
                forwarders.emplace(make_pair(ft, id), make_pair(std::move(forwarder), std::move(makeOutput)));
 
                // Finally forward to the check thread
-               zmq().send(check, cmdMsg, zmq::SNDMORE);
+               zmq().send(check, type, zmq::SNDMORE);
+               zmq().send(check, cmd, zmq::SNDMORE);
                zmq().send(check, ft, zmq::SNDMORE);
                zmq().send(check, id, zmq::SNDMORE);
                zmq().send(check, interval);
@@ -621,10 +756,10 @@ void ZmqTransmitterSvc::transmit() const {
                debug() << "Check thread reports " << (success ? "" : "un")
                        << "successful registration of forwarder of type " << ft
                        << " with interval " << interval << endmsg;
+
                // send result of registration to outputSocket call
                zmq().send(internal, success);
             } else if (cmd == Monitoring::s_Terminate) {
-               zmq().send(check, cmdMsg);
                break;
             }
          }
@@ -642,68 +777,80 @@ void ZmqTransmitterSvc::transmit() const {
                       << " with id " << id << " and message type " << msgType << endmsg;
          } else if (msgType == Monitoring::s_Publish) {
             // If we are not checking for a connection, we've been connected before, but we want to recheck.
-            debug() << "Received publish message for forward type: " << forwardType
-                    << ", id: " << id << ", connected: " << infoConnected(it->second)
-                    << ", checking: " << infoChecking(it->second) << endmsg;
-            if (!infoChecking(it->second)) {
+            auto checking = infoChecking(it->second);
+            if (!checking) {
                infoConnected(it->second) = false;
                infoChecking(it->second) = true;
             }
             // If we are not connected, send a check message
             if (!infoConnected(it->second)) {
+               if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                  debug() << "Sending check message for forward type: "
+                          << forwardType << ", id: " << id << endmsg;
+               }
                sendCheckMsg(dataOut(it->second), jobInfo(it->second));
             }
          }
       }
 
-      // Debug messages from check thread
-      if (items[2].revents & ZMQ_POLLIN) {
-         debug() << "sendCheck: " << zmq().receive<string>(dbg) << endmsg;
-      }
-
       // Loop over data input connections to process messages
       for (auto& entry : dataInConnections) {
          if (items[entry.first].revents & ZMQ_POLLIN) {
             bool more = true;
-            auto forwardTypeMsg = zmq().receive<zmq::message_t>(entry.second, &more);
-            auto forwardType = zmq().decode<string>(forwardTypeMsg);
+            auto forwardType = zmq().receive<string>(entry.second, &more);
             boost::optional<size_t> id;
             Key key;
-            auto it = end(forwarders);
-            if (more) {
-               id = zmq().receive<size_t>(entry.second, &more);
+
+            if (forwardType == Monitoring::s_Check) {
+               forwardType = zmq().receive<string>(entry.second);
+               id = zmq().receive<size_t>(entry.second);
                key = Key{forwardType, *id};
-               verbose() << "Received data in message for forward type " << forwardType
-                         << " id " << *id << endmsg;
-               it = forwarders.find(key);
-            } else {
-               error() << "Received data in message of only size 1" << endmsg;
-            }
-            if (it != end(forwarders)) {
-               Transmitter::Forwarder& forward = it->second.first;
-               Messages msgs;
-               msgs.reserve(forward.nMsg);
-               msgs.emplace_back(std::move(forwardTypeMsg));
-               for (unsigned int i = 1; i < forward.nMsg; ++i) {
-                  if (!more) {
-                     throw ZMQ::MoreException{};
-                  }
-                  msgs.emplace_back(zmq().receive<zmq::message_t>(entry.second, &more));
-               }
-               // If we are not connected and we are above the high water mark, throw some data away.
                auto infoIt = infos.find(key);
-               auto& messages = infoMessages(infoIt->second);
-               if (messages.size() > forward.hwm) {
-                  messages.pop_front();
+               if (infoIt != end(infos) && infoConnected(infoIt->second)) {
+                  const auto& messages = infoMessages(infoIt->second);
+                  size_t credit = forwarders[key].first.hwm - messages.size();
+                  size_t batchSize = std::get<6>(infoIt->second);
+                  zmq().send(entry.second, credit < batchSize ? credit : batchSize);
+               } else {
+                  zmq().send(entry.second, 0);
                }
-               if (!infoConnected(infoIt->second)) {
-                  messages.emplace_back(std::move(msgs));
+            } else {
+               auto it = end(forwarders);
+               if (more) {
+                  id = zmq().receive<size_t>(entry.second, &more);
+                  key = Key{forwardType, *id};
+                  it = forwarders.find(key);
                } else {
-                  sendMessage(dataOut(infoIt->second), msgs);
+                  error() << "Received data in message of only size 1" << endmsg;
+               }
+               if (it != end(forwarders)) {
+                  Transmitter::Forwarder& forward = it->second.first;
+                  Messages msgs;
+                  msgs.reserve(forward.nMsg);
+                  msgs.emplace_back(zmq().encode(forwardType));
+                  for (unsigned int i = 1; i < forward.nMsg; ++i) {
+                     if (!more) {
+                        throw ZMQ::MoreException{};
+                     }
+                     msgs.emplace_back(zmq().receive<zmq::message_t>(entry.second, &more));
+                  }
+                  if (more) {
+                     error() << "Received too many messages for forward type: " << forwardType << endmsg;
+                  }
+                  // If we are not connected and we are above the high water mark, throw some data away.
+                  auto infoIt = infos.find(key);
+                  auto& messages = infoMessages(infoIt->second);
+                  if (messages.size() > forward.hwm) {
+                     if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                        debug() << "Removing buffered message, above HWM of " << forward.hwm << endmsg;
+                     }
+                     messages.pop_front();
+                  }
+                  messages.emplace_back(std::move(msgs));
+               } else if (id) {
+                  warning() << "Received forward message for unknown forward type: " << forwardType
+                            << " id: " << *id << endmsg;
                }
-            } else if (id) {
-               warning() << "Received forward message for unknown forward type: " << forwardType
-                         << " id: " << *id << endmsg;
             }
          }
       }
@@ -717,8 +864,10 @@ void ZmqTransmitterSvc::transmit() const {
             if (msgType == Monitoring::s_Check) {
                auto forwardType = zmq().receive<string>(svcSocket);
                auto id = zmq().receive<size_t>(svcSocket);
-               debug() << "Got reply to check message on service connection " << entry.second.first
-                       << " for type " << forwardType << " id " << id << endmsg;
+               if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                  debug() << "Got reply to check message on service connection " << entry.second.first
+                          << " for type " << forwardType << " id " << id << endmsg;
+               }
                // We got a reply to our check message in the form or a check request, so we are connected and
                // can stop checking. Reply with our service connection so the other side can check that it
                // sent the request to the right connection.
@@ -728,6 +877,11 @@ void ZmqTransmitterSvc::transmit() const {
                   infoConnected(info) = true;
                   infoChecking(info) = false;
 
+                  auto& queue = infoMessages(info);
+                  auto credit = queue.size() < 1000 ? 667 : queue.size();
+                  std::get<5>(info) = boost::numeric_cast<size_t>(credit * 1.5);
+                  std::get<6>(info) = std::max(std::get<5>(info) / 10 + 1, 200ul);
+
                   // Send the connection that others should connect to as reply
                   string infoSvcOut = boost::regex_replace(entry.first, reInfoSvc,
                                                            string{"tcp://"} + Monitoring::hostname() + "$1");
@@ -744,28 +898,87 @@ void ZmqTransmitterSvc::transmit() const {
          }
       }
 
-      for (auto& info : infos) {
-         InfoTuple& infos = info.second;
-         auto& forwardType = info.first;
-         unsigned int hwm = forwarders[forwardType].first.hwm;
-         if (!infoConnected(infos) && !infoChecking(infos) && (infoMessages(infos).size() > (hwm >> 2))) {
-            infoChecking(infos) = true;
-            if (!checkRecreated(forwardType)) dataOut(infos) = forwarders[forwardType].second();
-            sendCheckMsg(dataOut(infos), jobInfo(infos));
-         }
+      // Send messages
+      if (items[2].revents & ZMQ_POLLIN) {
+         zmq().receive<string>(pub);
+         for (auto& info : infos) {
+            InfoTuple& infoTuple = info.second;
+            const auto& forwardType = info.first;
+
+            auto& nPub = std::get<7>(infoTuple);
+            ++nPub;
+
+            if (!infoConnected(infoTuple)
+                && !infoMessages(infoTuple).empty()) {
+               infoChecking(infoTuple) = true;
+               if (!checkRecreated(forwardType)
+                   && (nPub != 0) && (nPub % 5 == 0)) {
+
+                  if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+                     debug() << "Recreating socket for forward type "
+                             << forwardType << endmsg;
+                  }
 
-         if (infoConnected(infos) && !infoMessages(infos).empty()) {
-            if (msgLevel(MSG::DEBUG) && infoMessages(infos).size() > 1)
-               debug() << "Sending " << infoMessages(infos).size() << " buffered messages." << endmsg;
-            zmq::socket_t& output = dataOut(infos);
-            for (auto& entry : infoMessages(infos)) {
-               sendMessage(output, entry);
+                  dataOut(infoTuple) = forwarders[forwardType].second();
+                  recreated[info.first] = true;
+               }
+               sendCheckMsg(dataOut(infoTuple), jobInfo(infoTuple));
             }
-            infoMessages(infos).clear();
-            if (forwarders[forwardType].first.checkEverySend) infoConnected(infos) = false;
+
+            if (infoConnected(infoTuple) && !infoMessages(infoTuple).empty()) {
+
+               auto& credit = std::get<5>(infoTuple);
+               auto batchSize = std::get<6>(infoTuple);
+
+               zmq::socket_t& output = dataOut(infoTuple);
+               auto& messages = infoMessages(infoTuple);
+
+               // Send either batchSize messages or all remaining if
+               // fewer than batchSize
+               // Send never more than the remaining credit
+               auto n = std::min(credit, std::min(batchSize, messages.size()));
+
+               if (UNLIKELY(msgLevel(MSG::DEBUG) && n > 0)) {
+                  debug() << "Sending " << n << " buffered messages for "
+                          << forwardType << endmsg;
+               }
+
+               for (size_t i = 0; i < n; ++i) {
+                  auto& message = messages.front();
+                  size_t nMsg = message.size();
+                  for (unsigned int i = 0; i < nMsg; ++i) {
+                     zmq().send(output, message[i], (i < (nMsg - 1)) ? zmq::SNDMORE : 0);
+                  }
+                  messages.pop_front();
+               }
+               credit -= n;
+
+               if (forwarders[forwardType].first.checkEverySend && nPub == 10) {
+                  infoConnected(infoTuple) = false;
+               }
+            }
+            if (nPub == 10) nPub = 0;
          }
       }
-   }
 
-   checkThread.join();
+      // Debug messages from check thread
+      if (items[3].revents & ZMQ_POLLIN) {
+         auto msg = zmq().receive<string>(dbg);
+         if (UNLIKELY(msgLevel(MSG::DEBUG))) {
+            debug() << "sendCheck: " << msg << endmsg;
+         }
+      }
+
+      // If an exception has occurred in one of the daughter threads, send the other one a terminate and then exit ourselves
+      if (pubException && !checkException) {
+         error() << "Publish exception was caught, exiting transmit." << endmsg;
+         zmq().send(check, Monitoring::s_Command, zmq::SNDMORE);
+         zmq().send(check, Monitoring::s_Terminate);
+         break;
+      } else if (checkException && !pubException) {
+         error() << "Check exception was caught, exiting transmit." << endmsg;
+         zmq().send(pub, Monitoring::s_Terminate);
+         break;
+      }
+   }
 }
diff --git a/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.h b/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.h
index bd3ccf240a0982c5aa2ae46c1d5ba16bf6342725..a025f0d9945d69e186ba69cf5e040d65e7710144 100644
--- a/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.h
+++ b/Online/Hlt2Monitoring/src/component/ZmqTransmitterSvc.h
@@ -8,6 +8,8 @@
 #include <boost/optional.hpp>
 
 // Include files
+#include <GaudiKernel/IIncidentSvc.h>
+#include <GaudiKernel/IIncidentListener.h>
 #include <GaudiKernel/Service.h>
 
 // from Hlt2Monitoring
@@ -100,10 +102,17 @@ class GAUDI_API ZmqTransmitterSvc : public extends<Service, ITransmitterSvc> {
       return m_application;
    }
 
+   bool ok() const override
+   { 
+      return m_ok;
+   }
+
  private:
 
+   using Options = std::vector<std::pair<zmq::SocketOptions, int>>;
+
    std::string infoSvcCon(const std::string& dataCon) const {
-      if (dataCon.substr(0, 4) == "tcp") {
+      if (dataCon.substr(0, 3) == "tcp") {
          return std::string{"tcp://*:"} + std::to_string(m_infoPort);
       } else {
          return std::string{"ipc://"} + m_connectionPath + "/" + name() + "_" + std::to_string(::getpid());
@@ -115,26 +124,45 @@ class GAUDI_API ZmqTransmitterSvc : public extends<Service, ITransmitterSvc> {
    }
 
    void sendCheck(const std::string internalCon, const std::string debugCon) const;
+   void publish(const std::string internalCon) const;
 
    std::string internalCon(const std::string& type, unsigned int i = 0) const {
       return std::string{"inproc://"} + name() + "_" + type + "_" + std::to_string(i);
    }
 
+   zmq::socket_t internalSocket(std::string con, bool bind = true,
+                                Options options = Options{}) const 
+   {
+      auto s = zmq().socket(zmq::PAIR);
+      zmq::setsockopt(s, zmq::LINGER, 0);
+      for (const auto& e : options) zmq::setsockopt(s, e.first, e.second);
+      bind ? s.bind(con.c_str()) : s.connect(con.c_str());
+      return s;
+   };
+
+
    void setup();
-   void transmit() const;
+   void transmitWrapper();
+   void transmit(zmq::socket_t& internal, zmq::socket_t& check,
+                 zmq::socket_t& pub,
+                 std::exception_ptr& pubException,
+                 std::exception_ptr& checkException) const;
 
    std::pair<std::string, std::string> m_application;
    std::string m_hostRegex;
    unsigned int m_infoPort;
    std::string m_connectionPath;
+   unsigned int m_maxMonRestart;
+   mutable std::atomic<bool> m_ok{false};
 
    SmartIF<IZeroMQSvc> m_zmqSvc;
    std::unique_ptr<zmq::socket_t> m_trigger;
    std::string m_triggerCon;
-   unsigned int m_internalConCounter;
+   unsigned int m_internalConCounter = 1;
 
    std::unique_ptr<zmq::socket_t> m_internal;
    std::unique_ptr<std::thread> m_thread;
 
+   std::exception_ptr m_transmitException = nullptr;
 };
 #endif // ZMQTRANSMITTERSVC_H
diff --git a/Online/Hlt2Monitoring/src/lib/HistoUtils.cpp b/Online/Hlt2Monitoring/src/lib/HistoUtils.cpp
index 8c8bf380c49c9688c179fa439463a40bb34d0f1f..7a1c87b61f95fc0aa16b389c520cf1896461589b 100644
--- a/Online/Hlt2Monitoring/src/lib/HistoUtils.cpp
+++ b/Online/Hlt2Monitoring/src/lib/HistoUtils.cpp
@@ -7,17 +7,41 @@
 // boost
 #include <boost/numeric/conversion/cast.hpp>
 
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/stream.hpp>
+
+#include <boost/filesystem.hpp>
+
+#include <boost/archive/text_iarchive.hpp>
+
+// range v3
+#include <range/v3/algorithm.hpp>
+#include <range/v3/view.hpp>
+
 // ROOT
 #include <THashList.h>
 #include <TObjString.h>
 
 // local
-#include "Hlt2Monitoring/HistoUtils.h"
+#include <Hlt2Monitoring/Types.h>
+#include <Hlt2Monitoring/HistoUtils.h>
+#include <Hlt2Monitoring/Histo1DDef.h>
+#include <Hlt2Monitoring/Histo2DDef.h>
+#include <Hlt2Monitoring/InfoUtils.h>
 
 namespace {
    using std::string;
    using std::tuple;
    using std::vector;
+   using std::ofstream;
+   using namespace ranges;
+   using boost::math::sign;
+   using boost::math::epsilon_difference;
+
+   namespace io = boost::iostreams;
+   namespace fs = boost::filesystem;
 }
 
 tuple<int, double, double, vector<double>> axisDefinition(const Gaudi::Axis& axis) {
@@ -60,3 +84,117 @@ vector<string> getLabels(const Gaudi::Axis& axis) {
    }
    return labels;
 }
+
+bool same_bins(double ll, double lh, int lb,
+               double rl, double rh, int rb) {
+   if (lb != rb) {
+      return false;
+   } else if ((sign(ll) != sign(rl))
+              || (sign(lh) != sign(rh))) {
+      return false;
+   } else if ((epsilon_difference(ll, rl) > 2)
+              || (epsilon_difference(lh, rh) > 2)) {
+      return false;
+   } else {
+      return true;
+   }
+}
+
+bool same_labels(const std::vector<std::string>& ll,
+                 const std::vector<std::string>& rl) {
+   if (ll.size() != rl.size()) {
+      return false;
+   } else {
+      return all_of(view::zip(ll, rl), [](const std::tuple<std::string, std::string>& t) {
+            return std::get<0>(t) == std::get<1>(t);
+         });
+   }
+}
+
+// Hash a Histo1DDef
+size_t Monitoring::hash_value(const Monitoring::Histo1DDef& def) {
+   std::size_t h = 0;
+   boost::hash_combine(h, def.title);
+   if (def.variable) {
+      boost::hash_combine(h, def.xedges);
+   } else {
+      boost::hash_combine(h, def.xlow);
+      boost::hash_combine(h, def.xhigh);
+      boost::hash_combine(h, def.xbins);
+   }
+   if (def.labels) {
+      boost::hash_combine(h, def.xlabels);
+   }
+   return h;
+}
+
+// Hash a Histo2DDef
+size_t Monitoring::hash_value(const Monitoring::Histo2DDef& def) {
+   std::size_t h = 0;
+   boost::hash_combine(h, def.title);
+   if (def.xvariable) {
+      boost::hash_combine(h, def.xedges);
+   } else {
+      boost::hash_combine(h, def.xlow);
+      boost::hash_combine(h, def.xhigh);
+      boost::hash_combine(h, def.xbins);
+   }
+   if (def.yvariable) {
+      boost::hash_combine(h, def.yedges);
+   } else {
+      boost::hash_combine(h, def.ylow);
+      boost::hash_combine(h, def.yhigh);
+      boost::hash_combine(h, def.ybins);
+   }
+   if (def.labels) {
+      boost::hash_combine(h, def.xlabels);
+      boost::hash_combine(h, def.ylabels);
+   }
+   return h;
+}
+
+std::pair<Monitoring::HistoMap::const_iterator, bool>
+addHistogram(Monitoring::HistoMap& histograms,
+             const Monitoring::HistoKey& key,
+             const string& type,
+             MonInfo::HistoVariant variant) {
+   size_t hash = boost::hash<MonInfo::HistoVariant>{}(variant);
+   auto vit = histograms.get<MonInfo::ByContent>().find(hash);
+   std::shared_ptr<MonInfo::HistoVariant> shared;
+   if (vit == end(histograms.get<MonInfo::ByContent>())) {
+      // Completely new
+      shared = std::make_shared<MonInfo::HistoVariant>(std::move(variant));
+   } else {
+      shared = vit->cnt;
+   }
+   return histograms.emplace(std::move(key), std::move(type),
+                             hash, std::move(shared));
+}
+
+size_t loadHistoInfo(Monitoring::HistoMap& histograms, string input_file) {
+   fs::path filename = fs::path{input_file};
+   if (!fs::exists(filename)) {
+      return 0;
+   }
+
+   io::stream<io::file_source> input(filename.string(), ofstream::in | ofstream::binary);
+   io::filtering_istream filter;
+   filter.push(io::bzip2_decompressor());
+   filter.push(input);
+   boost::archive::text_iarchive ta(filter);
+
+   Monitoring::HistoMap tmp;
+   size_t n_read = 0;
+   while (!filter.eof()) {
+      try {
+         ta >> tmp;
+         ++n_read;
+      } catch (const boost::archive::archive_exception&) {
+         break;
+      }
+      for (const auto& entry : tmp) {
+         addHistogram(histograms, entry.key, entry.type, entry.content());
+      }
+   }
+   return n_read;
+}
diff --git a/Online/Hlt2Monitoring/src/lib/Utilities.cpp b/Online/Hlt2Monitoring/src/lib/Utilities.cpp
index 4776eca4ee8c13638787e0e9a474c1093aff40ee..54ff57ae33cea49d8c6e61f423b7ca8063e012c3 100644
--- a/Online/Hlt2Monitoring/src/lib/Utilities.cpp
+++ b/Online/Hlt2Monitoring/src/lib/Utilities.cpp
@@ -1,23 +1,12 @@
-// Include files
 #include <iostream>
 #include <string>
 #include <map>
 
-// boost
 #include <boost/numeric/conversion/cast.hpp>
 #include <boost/range/iterator_range.hpp>
 #include <boost/lexical_cast.hpp>
-
-// ROOT
-#include <TDirectory.h>
-#include <TClass.h>
-#include <TFile.h>
-#include <TKey.h>
-
-// boost
 #include <boost/regex.hpp>
 
-// local
 #include "Hlt2Monitoring/Utilities.h"
 
 namespace {
@@ -42,16 +31,6 @@ namespace Gaudi {
 }
 #endif
 
-#ifdef STANDALONE
-IZeroMQSvc* zmqSvc() {
-   static std::unique_ptr<IZeroMQSvc> zmqSvc;
-   if (!zmqSvc) {
-      zmqSvc.reset(new IZeroMQSvc{});
-   }
-   return zmqSvc.get();
-}
-#endif
-
 //===============================================================================
 unsigned int Monitoring::sourceID(boost::regex regex, string host) {
    boost::smatch matches;
@@ -99,6 +78,10 @@ string Monitoring::hostname() {
    string hn;
    if (!gethostname(hname, sizeof(hname))) {
       hn = string{hname};
+      auto pos = hn.find('.');
+      if (pos != string::npos) {
+         hn = hn.substr(0, pos);
+      }
    }
    return hn;
 }
diff --git a/Online/Hlt2Monitoring/test/dump_info.cpp b/Online/Hlt2Monitoring/test/dump_info.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..34f2a2ebf128156ebee3d1660c97ac3109a86594
--- /dev/null
+++ b/Online/Hlt2Monitoring/test/dump_info.cpp
@@ -0,0 +1,530 @@
+#include <iostream>
+#include <vector>
+#include <string>
+#include <csignal>
+#include <fstream>
+#include <random>
+
+#include <boost/format.hpp>
+#include <boost/optional.hpp>
+#include <boost/functional/hash.hpp>
+
+#include <boost/archive/text_oarchive.hpp>
+
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/set.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/variant.hpp>
+
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/stream.hpp>
+
+#include <boost/program_options.hpp>
+
+#include <boost/filesystem.hpp>
+
+#include <zmq/zmq.hpp>
+#include <ZeroMQ/functions.h>
+#include <ZeroMQ/IZeroMQSvc.h>
+
+#include <Hlt2Monitoring/Types.h>
+#include <Hlt2Monitoring/Histo1DDef.h>
+#include <Hlt2Monitoring/Histo2DDef.h>
+#include <Hlt2Monitoring/HistoUtils.h>
+#include <Hlt2Monitoring/InfoUtils.h>
+#include <Hlt2Monitoring/Utilities.h>
+
+namespace {
+   using std::cout;
+   using std::endl;
+   using std::vector;
+   using std::string;
+   using std::ofstream;
+
+   using std::array;
+   using std::set;
+   using std::vector;
+   using std::string;
+   using std::to_string;
+   using std::unique_ptr;
+   using std::make_pair;
+   using std::make_tuple;
+   using std::pair;
+   using std::map;
+   using std::unordered_set;
+
+   using boost::optional;
+
+   using Monitoring::HistoKey;
+   using Monitoring::HistoKeys;
+   using Monitoring::HistoPub;
+   using Monitoring::HistoMap;
+   using Monitoring::Histo1DDef;
+   using Monitoring::Histo2DDef;
+
+   using MonInfo::ByKey;
+   using MonInfo::ByContent;
+   using MonInfo::HistoVariant;
+   using MonInfo::HistoEntry;
+
+   namespace io = boost::iostreams;
+   namespace fs = boost::filesystem;
+   namespace po = boost::program_options;
+
+   volatile std::sig_atomic_t interrupted = 0;
+}
+
+IZeroMQSvc& zmqSvc() {
+   static std::unique_ptr<IZeroMQSvc> svc;
+   if (!svc) {
+      svc = std::make_unique<IZeroMQSvc>();
+   }
+   return *svc;
+}
+
+void signal_handler(int)
+{
+  interrupted = 1;
+}
+
+// Sync in the old way
+optional<bool> syncOld(Monitoring::HistoMap& histograms, const HistoKeys& written, std::string connection);
+
+// Sync in the new way
+optional<bool> syncNew(Monitoring::HistoMap& histograms, const HistoKeys& written, std::string connection);
+
+// Decode histo information
+bool decodeHistoInfo(const vector<zmq::message_t>& msgs, Monitoring::HistoMap& histograms);
+
+int main(int ac, char* av[]) {
+
+   string method;
+   unsigned int port;
+
+   // Declare the supported options.
+   po::options_description desc("Allowed options");
+   desc.add_options()
+      ("help", "produce help message")
+      ("method,m", po::value<string>(&method)->default_value("new"), "input file")
+      ("input-file,i", po::value<vector<string>>(), "input file")
+      ("output-file,o", po::value<string>(), "output file")
+      ("host,h", po::value<vector<string>>(), "host to sync with")
+      ("port,p", po::value<unsigned int>(&port)->default_value(31352), "port to connect to")
+      ("node,n", po::value<vector<string>>(), "node to sync with");
+
+   po::positional_options_description p;
+   p.add("output-file", 1);
+
+   po::variables_map vm;
+   po::store(po::command_line_parser(ac, av).
+             options(desc).positional(p).run(), vm);
+   po::notify(vm);
+
+   if (vm.count("help")) {
+      cout << desc << "\n";
+      return 1;
+   }
+
+   if (method != "old" && method != "new") {
+      cout << "method must be either old or new" << endl;
+      return 1;
+   }
+
+
+   vector<string> input_files;
+   if (vm.count("input-file")) {
+      input_files = vm["input-file"].as< vector<string>>();
+   }
+
+   auto output_file = vm["output-file"].as<string>();
+   if (fs::exists(fs::path{output_file})) {
+      input_files.push_back(output_file);
+   }
+
+   set<string> syncConnections;
+   if (vm.count("host")) {
+      for (auto host : vm["host"].as< vector<string>>()) {
+         syncConnections.emplace(host);
+      }
+   } else {
+      // Build set of all subfarms
+      vector<string> subfarms;
+      for (auto rack : string{"abcdef"}) {
+         for (int row = 1; row < 11; ++row) {
+            subfarms.emplace_back((boost::format{"hlt%s%02d"} % rack % row).str());
+         }
+      }
+
+      std::mt19937 gen{std::hash<string>{}("dump_info")};
+
+      auto connection = [port](const string& host) -> string {
+         return string{"tcp://"} + host + ":" + to_string(port);
+      };
+
+      vector<string>::iterator last = subfarms.end() - 1, first = subfarms.begin();
+      while (syncConnections.size() < 5 && last != first - 1) {
+         size_t s = distance(first, last);
+         auto it = first + std::uniform_int_distribution<size_t>{0, s ? s - 1 : 0}(gen);
+         auto con = connection(*it);
+
+         zmq::socket_t ping = zmqSvc().socket(zmq::REQ);
+         zmq::setsockopt(ping, zmq::LINGER, 0);
+         zmq::setsockopt(ping, zmq::RCVTIMEO, 100);
+         ping.connect(con.c_str());
+         zmqSvc().send(ping, Monitoring::s_Ping);
+         string r;
+
+         zmq::pollitem_t items[] = {
+            {ping, 0, ZMQ_POLLIN, 0}
+         };
+
+         auto n = zmq::poll(&items[0], 1, 500);
+
+         if (items[0].revents & ZMQ_POLLIN) {
+            try {
+               r = zmqSvc().receive<string>(ping);
+            } catch (const ZMQ::TimeOutException&) {
+            }
+         }
+
+         if (n == 0 || r.empty()) {
+            cout << "no reply from " << con << endl;
+         } else {
+            syncConnections.emplace(con);
+         }
+
+         std::swap(*it, *last);
+         --last;
+      }
+   }
+
+   HistoKeys written;
+   Monitoring::HistoMap histograms;
+
+   auto n_read = 0;
+   for (auto input_file : input_files) {
+      auto n = loadHistoInfo(histograms, input_file);
+      if (n) {
+         ++n_read;
+         cout << "Read " << histograms.size() << " histograms from " << input_file << endl;
+      }
+   }
+   cout << "Read " << n_read << " sets of histograms." << endl;
+
+
+   io::stream<io::file_sink> output(output_file, ofstream::out | ofstream::binary);
+   io::filtering_stream<io::output> filter;
+   filter.push(io::bzip2_compressor()) ;
+   filter.push(output);
+   boost::archive::text_oarchive ta(filter);
+
+   size_t tries = 100;
+   bool more = true;
+
+   std::signal(SIGINT, signal_handler);
+
+   cout << "Synchronising with:";
+   for (auto c : syncConnections) {
+      cout << " " << c;
+   }
+   cout << endl;
+
+   auto it = begin(syncConnections);
+   while (more && it != end(syncConnections) && interrupted == 0) {
+      auto connection = *it;
+      cout << "Synchronising with: " << connection << endl;
+      while (more && interrupted == 0) {
+         optional<bool> r;
+         try {
+            if (method == "old") {
+               r = syncOld(histograms, written, connection);
+            } else {
+               r = syncNew(histograms, written, connection);
+            }
+         } catch (const zmq::error_t& e) {
+            more = false;
+            break;
+         }
+         if (!r) {
+            more = true;
+            break;
+         } else {
+            more = *r;
+         }
+
+      }
+      ++it;
+      if (it == end(syncConnections) && tries > 0) {
+         it = begin(syncConnections);
+         --tries;
+      }
+   }
+
+   if (!histograms.empty()) {
+      ta << histograms;
+      cout << "Total: " << histograms.size() << endl;
+   }
+}
+
+std::pair<HistoMap::const_iterator, bool> addHistogram(HistoMap& histograms,
+                                                       const HistoKey& key,
+                                                       const string& type,
+                                                       HistoVariant variant) {
+   size_t hash = boost::hash<MonInfo::HistoVariant>{}(variant);
+   auto vit = histograms.get<ByContent>().find(hash);
+   std::shared_ptr<HistoVariant> shared;
+   if (vit == end(histograms.get<ByContent>())) {
+      // Completely new
+      shared = std::make_shared<HistoVariant>(std::move(variant));
+   } else {
+      shared = vit->cnt;
+   }
+   return histograms.emplace(std::move(key), std::move(type),
+                             hash, std::move(shared));
+}
+
+optional<bool> syncOld(Monitoring::HistoMap& histograms,
+                              const HistoKeys& written,
+                              std::string connection)
+{
+
+   vector<string> what = {Monitoring::s_HistoInfo};
+
+   HistoKeys histoKeys;
+   std::for_each(begin(histograms), end(histograms),
+                 [&histoKeys](const HistoEntry& entry) {
+                    histoKeys.emplace(entry.key);
+                 });
+
+   std::for_each(begin(written), end(written),
+                 [&histoKeys](const HistoKey& key) {
+                    histoKeys.emplace(key);
+                 });
+
+   // Connect output request socket
+   zmq::socket_t out = zmqSvc().socket(zmq::REQ);
+   zmq::setsockopt(out, zmq::LINGER, 0);
+   zmq::setsockopt(out, zmq::RCVTIMEO, 1000);
+   out.connect(connection.c_str());
+
+   optional<bool> more;
+
+   // Request synchronisation
+   zmqSvc().send(out, Monitoring::s_Sync, zmq::SNDMORE);
+
+   // Send what we want to synchronise in the right order
+   zmqSvc().send(out, what, zmq::SNDMORE);
+
+   // Indicate we want all runs, by sending an empty set, and
+   // the histo keys we have.
+   zmqSvc().send(out, std::unordered_set<Monitoring::RunNumber>{}, zmq::SNDMORE);
+   zmqSvc().send(out, histoKeys);
+
+   optional<string> rep;
+   zmq::pollitem_t items[] = {
+      {out, 0, ZMQ_POLLIN, 0}
+   };
+
+   zmq::poll(&items[0], 1, 5000);
+   if (items[0].revents & ZMQ_POLLIN) {
+      rep = zmqSvc().receive<std::string>(out);
+   }
+
+   if (!rep) {
+      cout << "Sync request reply timed out." << endl;
+      return more;
+   } else if (*rep != "INCOMING") {
+      cout << "Bad reply to sync request: " << *rep << endl;
+      return more;
+   }
+
+   std::unordered_map<Monitoring::RunNumber, size_t> hpr;
+
+   // Old typedef
+   using HistoPub = std::vector<std::tuple<Monitoring::RunNumber, Monitoring::HistId,
+                                           std::string, std::string>>;
+
+   // Histograms
+   auto histos = zmqSvc().receive<HistoPub>(out);
+   more = zmqSvc().receive<bool>(out);
+   for (const auto& entry : histos) {
+      auto run = std::get<0>(entry);
+      auto histID = std::get<1>(entry);
+      hpr[run]++;
+
+      // Update known keys with received info
+      histoKeys.emplace(run, histID);
+
+      // Entry consists of (RunNumber, HistId, type, info_string),
+      // where the info string needs to be converted to a message.
+      vector<zmq::message_t> msgs;
+      msgs.reserve(5);
+      msgs.emplace_back(zmqSvc().encode(Monitoring::s_HistoInfo));
+      msgs.emplace_back(zmqSvc().encode(run));
+      msgs.emplace_back(zmqSvc().encode(histID));
+      msgs.emplace_back(zmqSvc().encode(std::get<2>(entry)));
+
+      const auto& infoString = std::get<3>(entry);
+      zmq::message_t msg{infoString.size()};
+      std::copy_n(begin(infoString), infoString.size(), static_cast<char*>(msg.data()));
+      msgs.emplace_back(std::move(msg));
+      decodeHistoInfo(msgs, histograms);
+   }
+
+   for (const auto& entry : hpr) {
+      cout << "Decoded " << std::right << std::setw(6) << to_string(entry.second)
+           << " histograms for run " << entry.first << endl;
+   }
+
+   return more;
+}
+
+optional<bool> syncNew(Monitoring::HistoMap& histograms,
+                       const HistoKeys& written,
+                       std::string connection)
+{
+
+   vector<string> what = {Monitoring::s_HistoInfo};
+
+   HistoKeys histoKeys;
+   std::for_each(begin(histograms), end(histograms),
+                 [&histoKeys](const HistoEntry& entry) {
+                    histoKeys.emplace(entry.key);
+                 });
+
+
+   std::for_each(begin(written), end(written),
+                 [&histoKeys](const HistoKey& key) {
+                    histoKeys.emplace(key);
+                 });
+
+   // Connect output request socket
+   zmq::socket_t out = zmqSvc().socket(zmq::REQ);
+   zmq::setsockopt(out, zmq::LINGER, 0);
+   zmq::setsockopt(out, zmq::RCVTIMEO, 100);
+   out.connect(connection.c_str());
+
+   optional<bool> more;
+
+   // Request synchronisation
+   zmqSvc().send(out, Monitoring::s_Sync, zmq::SNDMORE);
+
+   // Send what we want to synchronise in the right order
+   zmqSvc().send(out, what, zmq::SNDMORE);
+
+   // Indicate we want all runs, by sending an empty set, and
+   // the histo keys we have.
+   zmqSvc().send(out, std::unordered_set<Monitoring::RunNumber>{}, zmq::SNDMORE);
+   zmqSvc().send(out, histoKeys);
+
+   optional<string> rep;
+   zmq::pollitem_t items[] = {
+      {out, 0, ZMQ_POLLIN, 0}
+   };
+
+   zmq::poll(&items[0], 1, 1000);
+   if (items[0].revents & ZMQ_POLLIN) {
+      rep = zmqSvc().receive<std::string>(out);
+   }
+
+   if (!rep) {
+      cout << "Sync request reply timed out." << endl;
+      return more;
+   } else if (*rep != "INCOMING") {
+      cout << "Bad reply to sync request: " << *rep << endl;
+      return more;
+   }
+
+   std::unordered_map<Monitoring::RunNumber, size_t> hpr;
+
+   // Histograms
+   auto histos = zmqSvc().receive<HistoPub>(out);
+   more = zmqSvc().receive<bool>(out);
+   for (const auto& entry : histos) {
+      const auto& type = std::get<0>(entry);
+      const auto& info = std::get<1>(entry);
+      const auto& keys = std::get<2>(entry);
+
+      // Entry consists of (RunNumber, HistId, type, info_string),
+      // where the info string needs to be converted to a message.
+      vector<zmq::message_t> msgs;
+      msgs.reserve(5);
+
+      for (const auto& key : keys) {
+         if (msgs.empty()) {
+            msgs.emplace_back(zmqSvc().encode(Monitoring::s_HistoInfo));
+            msgs.emplace_back(zmqSvc().encode(key.first));
+            msgs.emplace_back(zmqSvc().encode(key.second));
+            msgs.emplace_back(zmqSvc().encode(type));
+
+            zmq::message_t msg{info.size()};
+            std::copy_n(begin(info), info.size(), static_cast<char*>(msg.data()));
+            msgs.emplace_back(std::move(msg));
+         } else {
+            msgs[1] = zmqSvc().encode(key.first);
+            msgs[2] = zmqSvc().encode(key.second);
+         }
+         // Update known keys with received info
+         histoKeys.emplace(key.first, key.second);
+
+         hpr[key.first]++;
+         decodeHistoInfo(msgs, histograms);
+      }
+   }
+
+   for (const auto& entry : hpr) {
+      cout << "Decoded " << std::right << std::setw(6) << to_string(entry.second)
+           << " histograms for run " << entry.first << endl;
+   }
+
+   if (more && *more) {
+      cout << "Syncing again with " << connection
+           << " as there is more info." << endl;
+   }
+   return more;
+}
+
+//===============================================================================
+bool decodeHistoInfo(const vector<zmq::message_t>& msgs, Monitoring::HistoMap& histograms)
+{
+   if (msgs.size() != 5) {
+      return false;
+   }
+
+   const auto run = zmqSvc().decode<Monitoring::RunNumber>(msgs[1]);
+   const auto id = zmqSvc().decode<Monitoring::HistId>(msgs[2]);
+   const pair<Monitoring::RunNumber, Monitoring::HistId> key{run, id};
+   auto type = zmqSvc().decode<std::string>(msgs[3]);
+
+   string title;
+   if (!histograms.count(key)) {
+      HistoMap::const_iterator it;
+
+      // New histogram, do we need to share content?
+      optional<HistoVariant> variant;
+      if (type == Monitoring::s_Rate) {
+         variant = zmqSvc().decode<string>(msgs[4]);
+      } else if (type == Monitoring::s_Histo1D) {
+         variant = zmqSvc().decode<Histo1DDef>(msgs[4]);
+      } else if (type == Monitoring::s_Histo2D) {
+         variant = zmqSvc().decode<Histo2DDef>(msgs[4]);
+      } else {
+         cout << "Unkown type of histogram info: " << type
+              << " for histogram with ID: " << run << " " << id << endl;
+      }
+
+      if (variant) {
+         addHistogram(histograms, key, type, std::move(*variant));
+      }
+
+      return true;
+   } else {
+      return false;
+   }
+}
diff --git a/Online/Hlt2Monitoring/test/test_registrar b/Online/Hlt2Monitoring/test/test_registrar
deleted file mode 100755
index d00a534b7663cd8ad2d70110594d20a5148a27bb..0000000000000000000000000000000000000000
Binary files a/Online/Hlt2Monitoring/test/test_registrar and /dev/null differ
diff --git a/Online/Hlt2Monitoring/test/test_registrar.cpp b/Online/Hlt2Monitoring/test/test_registrar.cpp
index 9d1289aac1fbf61d28a7701ac8ee0fd483329144..b44138ae56286c5caabb4e75180fd20aede50734 100644
--- a/Online/Hlt2Monitoring/test/test_registrar.cpp
+++ b/Online/Hlt2Monitoring/test/test_registrar.cpp
@@ -1,9 +1,13 @@
 #include <string>
 #include <iostream>
+#include <thread>
 
 #include <boost/optional.hpp>
 #include <boost/lexical_cast.hpp>
+#include <boost/program_options.hpp>
+
 #include <ZeroMQ/IZeroMQSvc.h>
+#include <Hlt2Monitoring/Utilities.h>
 
 #include <zmq/zmq.hpp>
 
@@ -15,23 +19,32 @@ namespace {
    using std::make_unique;
 
    using boost::lexical_cast;
+
+   namespace po = boost::program_options;
 }
 
-int main() {
+string g_hostname;
 
-   IZeroMQSvc zmqSvc{};
+const IZeroMQSvc& zmqSvc() {
+   static std::unique_ptr<IZeroMQSvc> svc;
+   if (!svc) {
+      svc = std::make_unique<IZeroMQSvc>();
+   }
+   return *svc;
+}
 
-   std::string hostname = "hltperf-quanta01-e52630v4";
-   std::string regCon = "tcp://" + hostname + ":31360";
-   // std::string regCon = "ipc:///tmp/test_registrar";
+//===============================================================================
+boost::optional<std::pair<unsigned int, unsigned int>>
+registerPublisher(std::string regCon)
+{
 
    boost::optional<std::pair<unsigned int, unsigned int>> r;
-   
+
    zmq::pollitem_t items[1];
    unsigned int tries = 0;
 
-   auto makeReg = [&zmqSvc, &items, &tries, &regCon] {
-      auto reg = make_unique<zmq::socket_t>(zmqSvc.context(), zmq::REQ);
+   auto makeReg = [&items, &tries, &regCon] {
+      auto reg = make_unique<zmq::socket_t>(zmqSvc().context(), zmq::REQ);
       items[0] = {reg->operator void*(), 0, zmq::POLLIN, 0};
       zmq::setsockopt(*reg, zmq::LINGER, 0);
       try {
@@ -50,9 +63,9 @@ int main() {
 
       if (!reg) break;
 
-      zmqSvc.send(*reg, "REGISTER", zmq::SNDMORE);
-      zmqSvc.send(*reg, hostname, zmq::SNDMORE);
-      zmqSvc.send(*reg, "TEST");
+      zmqSvc().send(*reg, "REGISTER", zmq::SNDMORE);
+      zmqSvc().send(*reg, g_hostname, zmq::SNDMORE);
+      zmqSvc().send(*reg, "TEST");
 
       zmq::poll(&items[0], 1, 1000);
 
@@ -60,7 +73,7 @@ int main() {
          // use string here to allow easier interop with possible python
          // remote end.
          string mp, mpp;
-         auto msg = zmqSvc.receive<string>(*reg);
+         auto msg = zmqSvc().receive<string>(*reg);
          if (msg == "TAKEN") {
             cout << "A service publishing files with name " << "TEST"
                  << " already exists, not publishing files." << endl;
@@ -68,8 +81,8 @@ int main() {
          }
 
          try {
-            mp = zmqSvc.receive<string>(*reg);
-            mpp = zmqSvc.receive<string>(*reg);
+            mp = zmqSvc().receive<string>(*reg);
+            mpp = zmqSvc().receive<string>(*reg);
             r = make_pair(lexical_cast<unsigned int>(mp), lexical_cast<unsigned int>(mpp));
          } catch (boost::bad_lexical_cast) {
             cout << "Got unexpected reply from registar: " << mp << " " << mpp << endl;
@@ -82,4 +95,163 @@ int main() {
    if (r) {
       cout << r->first << " " << r->second << endl;
    }
+   return r;
+}
+
+//===============================================================================
+void registrar(std::string regCon, std::string con)
+{
+   auto internal = zmqSvc().socket(zmq::PAIR);
+   zmq::setsockopt(internal, zmq::LINGER, 0);
+   internal.connect(con.c_str());
+
+   std::vector<zmq::pollitem_t> items(2);
+   items[0] = {internal, 0, zmq::POLLIN, 0};
+
+   auto makePing = [&items, regCon] {
+      auto ping = zmqSvc().socket(zmq::REQ);
+      zmq::setsockopt(ping, zmq::LINGER, 0);
+      ping.connect(regCon.c_str());
+      items[1] = {ping, 0, zmq::POLLIN, 0};
+      return ping;
+   };
+
+   boost::optional<zmq::socket_t> ping;
+
+   bool sentPing = false;
+   while (true) {
+      int timeo = sentPing ? 500 : 10 * 1000;
+      zmq::poll(&items[0], ping ? 2 : 1, timeo);
+
+      if (items[0].revents & zmq::POLLIN) {
+         auto cmd = zmqSvc().receive<std::string>(internal);
+         if (cmd == Monitoring::s_Terminate) {
+            break;
+         } else {
+            cout << "registar: received bad command message " << cmd << endl;
+         }
+      }
+
+      if (!ping) {
+         // attempt to register
+         auto r = registerPublisher(regCon);
+         if (r) {
+            zmqSvc().send(internal, r->first);
+            zmqSvc().send(internal, r->second);
+            ping = makePing();
+         }
+      } else if (sentPing) {
+         sentPing = false;
+         if (items[1].revents & zmq::POLLIN) {
+            auto msg = zmqSvc().receive<string>(*ping);
+            if (msg != Monitoring::s_Pong) {
+               cout << "Bad reply from ping to registrar: " << msg << endl;
+            } else {
+               auto app = zmqSvc().receive<string>(*ping);
+               cout << "Received reply to ping: " << app << endl;
+            }
+         } else {
+            ping.reset();
+            cout << "No reply to ping." << endl;
+         }
+      } else {
+         // send a ping message
+         sentPing = true;
+         if (!ping) {
+            ping = makePing();
+         }
+         zmqSvc().send(*ping, Monitoring::s_Ping);
+         cout << "Sent ping" << endl;
+      }
+   }
+}
+
+int main(int ac, char* av[]) {
+
+   // We need this to send to the registrar we are testing, so it
+   // knows where to find us.
+   g_hostname = Monitoring::hostname();
+
+   string regCon;
+
+   // Declare the supported options.
+   po::options_description desc("Allowed options");
+   desc.add_options()
+      ("help", "produce help message")
+      ("connection,c", po::value<string>(&regCon)->default_value("tcp://" + g_hostname + ":31360"),
+       "connection to registrar.");
+
+   po::variables_map vm;
+   po::store(po::command_line_parser(ac, av).
+             options(desc).run(), vm);
+   po::notify(vm);
+
+   if (vm.count("help")) {
+      cout << desc << "\n";
+      return 1;
+   }
+
+   string intRegCon = "inproc://registrar";
+   auto reg = zmqSvc().socket(zmq::PAIR);
+   zmq::setsockopt(reg, zmq::LINGER, 0);
+   reg.bind(intRegCon.c_str());
+   std::thread registrarThread{[regCon, intRegCon]{ registrar(regCon, intRegCon); }};
+
+   std::vector<zmq::pollitem_t> items;
+   items.push_back({reg, 0, zmq::POLLIN, 0});
+
+   boost::optional<zmq::socket_t> ping;
+   unsigned int pingPort = 0;
+   string pingCon;
+
+   size_t n = 0;
+
+   while (n < 10) {
+      zmq::poll(&items[0], items.size(), -1);
+
+      if (items[0].revents & zmq::POLLIN) {
+         auto pubPort = zmqSvc().receive<unsigned int>(reg);
+         auto pp = zmqSvc().receive<unsigned int>(reg);
+
+         cout << "received publisher ports: " << pubPort << " " << pp << endl;
+
+         if (pp != pingPort) {
+            if (!pingCon.empty() ) {
+               ping->disconnect(pingCon.c_str());
+            }
+
+            pingCon = "tcp://*:" + std::to_string(pp);
+
+            // Setup ping connection
+            ping = zmqSvc().socket(zmq::REP);
+            zmq::setsockopt(*ping, zmq::LINGER, 0);
+            ping->bind(pingCon.c_str());
+            if (pingPort != 0) {
+               items[1] = {*ping, 0, zmq::POLLIN, 0};
+            } else {
+               items.push_back({*ping, 0, zmq::POLLIN, 0});
+            }
+            pingPort = pp;
+         }
+      }
+
+      // Reply to pings from registration server to see if we're
+      // alive.
+      if (ping && items[1].revents & zmq::POLLIN) {
+         auto msg = zmqSvc().receive<string>(*ping);
+         cout << "Received message " << msg << " on ping socket." << endl;
+         if (msg == Monitoring::s_Ping) {
+            zmqSvc().send(*ping, Monitoring::s_Pong, zmq::SNDMORE);
+            zmqSvc().send(*ping, "TEST");
+            ++n;
+         } else {
+            cout << "Received unknown message on ping socket: " << msg << endl;
+         }
+      }
+      if (n == 10) {
+         zmqSvc().send(reg, Monitoring::s_Terminate);
+      }
+   }
+
+   registrarThread.join();
 }
diff --git a/Online/Monitoring/python/Monitoring/Communicator.py b/Online/Monitoring/python/Monitoring/Communicator.py
index 63f2c0f08f24236cc2d3a4c8b5d5fa34385af7e1..0c6e4db6e44a19267bbc039644be1152341fbae2 100644
--- a/Online/Monitoring/python/Monitoring/Communicator.py
+++ b/Online/Monitoring/python/Monitoring/Communicator.py
@@ -1,6 +1,7 @@
 import os
 import pydim
 import socket
+import errno
 
 
 class State(object):
@@ -58,8 +59,16 @@ class Communicator(object):
     def status(self):
         return self.__status
 
+    def pipe(self):
+        return self.__process_end
+
     def get_command(self):
-        return self.__process_end.recv()
+        while True:
+            try:
+                return self.__process_end.recv()
+            except IOError as e:
+                if e.errno != errno.EINTR:
+                    raise
 
     def set_status(self, status):
         self.__status = status
diff --git a/Online/Monitoring/python/Monitoring/DimMonitor.py b/Online/Monitoring/python/Monitoring/DimMonitor.py
index ee94e6259825f6026390e898c1d64a0906eaf32c..13dbfe1c5470baf8d904114e00e9b4d173075aa0 100644
--- a/Online/Monitoring/python/Monitoring/DimMonitor.py
+++ b/Online/Monitoring/python/Monitoring/DimMonitor.py
@@ -113,7 +113,7 @@ class DimMonitor(object):
 class DimForwarder(object):
 
     def __init__(self, service, t=None, dim_dns_node="mona08",
-                 connection_path="/tmp", context=None):
+                 connection_path="/tmp", context=None, pipe=None):
         self.__dns_node = dim_dns_node
         self.__connection_path = connection_path
         if not os.path.exists(connection_path):
@@ -123,6 +123,7 @@ class DimForwarder(object):
         self.__type = t if t is not None else "C"
         self.__context = context
         self.__internal = None
+        self.__pipe = None
 
     def __callback(self, tag, val):
         # the callback is called from another thread, so we need a socket to
@@ -134,6 +135,9 @@ class DimForwarder(object):
             self.__internal.send(val)
 
     def __call__(self):
+        if self.__pipe:
+            self.__pipe.recv()
+
         if self.__context is None:
             self.__context = zmq.Context()
 
@@ -154,12 +158,14 @@ class DimForwarder(object):
         for s in (internal, control):
             poller.register(s, zmq.POLLIN)
 
+        if self.__pipe:
+            self.__pipe.send("ready")
+
         done = False
         while not done:
             socks = dict(poller.poll())
             if control in socks and socks[control] == zmq.POLLIN:
-                message = control.recv()
-                print "Recieved control command: %s" % message
+                message = control.recv_string()
                 if message == "TERMINATE":
                     done = True
 
@@ -175,6 +181,7 @@ class DimForwarder(object):
     def control_connection(self):
         return self.service_connection() + "_control"
 
+
 __all__ = (DimMonitor,
            DimForwarder,
            get_dns_node,
diff --git a/Online/Monitoring/python/Monitoring/DiskMonitor.py b/Online/Monitoring/python/Monitoring/DiskMonitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..929aa56e94de53cb47e0c444ecad5b628aa5a48a
--- /dev/null
+++ b/Online/Monitoring/python/Monitoring/DiskMonitor.py
@@ -0,0 +1,195 @@
+import re
+import socket
+from Monitoring.DimMonitor import DimForwarder
+from Monitoring.decorators import zmq
+from Hlt2Monitoring.Utilities import node_type
+from Configurables import MonitoringJob
+from multiprocessing import Process, Pipe
+from ROOT import TH1D
+from ROOT import kOrange, kGreen, kBlue, kMagenta
+from Communicator import (Communicator, State)
+
+
+def nodes_info(val):
+    node_re = re.compile(
+        r"hlt(?P<rack>[a-f])(?P<row>[0-9]{2})(?P<node>[0-9]{2})")
+    info = {}
+    s = val.split('|')
+    if not s:
+        return info
+    for entry in s:
+        if not entry:
+            continue
+        if not entry[0].isalnum():
+            break
+        host, numbers = entry.split(' ')
+        m = node_re.match(host)
+        if not m:
+            continue
+        n = numbers.split('/')
+        total, free, nd, gd = float(n[0]), float(n[1]), int(n[2]), int(n[3])
+        info[(host, m.group('rack'), int(m.group('row')),
+              int(m.group('node')))] = (total, free, nd, gd)
+    return info
+
+
+plots = {0: ("slow", kMagenta - 3),
+         1: ("medium", kOrange + 5),
+         2: ("fast", kBlue - 3),
+         3: ("faster", kGreen - 3)}
+histos = {}
+
+
+def run(outputLevel=3, auto=True, **kwargs):
+
+    forward_pipe, start_pipe = Pipe()
+    filling_svc = DimForwarder('FarmStatus/StorageStatus',
+                               dim_dns_node="ecs03",
+                               connection_path="/tmp/DiskMonitor",
+                               pipe=start_pipe)
+    forwarder = Process(target=filling_svc)
+    forwarder.daemon = True
+    # fork must happen here, so start now
+    forwarder.start()
+
+    # Start the communicator
+    com = Communicator('DISKMONITOR')
+
+    mj = MonitoringJob(**kwargs)
+    mj.JobName = "DiskFilling"
+
+    # FSM loop
+    state = State.NOT_READY
+    com.set_status(state)
+
+    gaudi = None
+
+    print '[DEBUG] DiskMonitor launched'
+
+    poller = zmq.Poller()
+    command_pipe = com.pipe()
+    poller.register(command_pipe, zmq.POLLIN)
+
+    # socket placeholders
+    input_socket = None
+    control = None
+    zmqSvc = None
+
+    while True:
+
+        if not (auto and state in (State.NOT_READY, State.READY)):
+            timeout = -1
+        else:
+            timeout = 0
+
+        rep = dict(poller.poll(timeout))
+        command = None
+
+        # Handle commands
+        if command_pipe in rep and rep[command_pipe] == zmq.POLLIN:
+            command = com.get_command()
+            print 'Got command %s' % command
+
+        if ((command and command.startswith('configure') or auto)
+                and state == State.NOT_READY):
+
+            from Monitoring.MonitoringJob import initialize
+            initialize()
+
+            for nt, (name, color) in plots.iteritems():
+                histo = TH1D(name, name, 101, -0.05, 1.05)
+                histo.SetLineColor(color)
+                histo.SetLineWidth(2)
+                histos[nt] = histo
+
+            state = State.READY
+        elif ((command and command.startswith('start') or auto)
+              and state == State.READY):
+            # Really start the forwarder process
+            start_pipe.send('start')
+
+            # Start our main job
+            from Monitoring.MonitoringJob import start
+            monSvc = None
+            try:
+                _, monSvc = start()
+            except RuntimeError, e:
+                print e
+                state = State.ERROR
+                break
+
+            zmqSvc = monSvc.zmq()
+            input_socket = zmqSvc.socket(zmq.SUB)
+            input_socket.connect(filling_svc.service_connection())
+            input_socket.setsockopt(zmq.SUBSCRIBE, "")
+            poller.register(input_socket, zmq.POLLIN)
+
+            # Wait for forwarder to be ready
+            r = forward_pipe.poll(60)
+            if r:
+                forward_pipe.recv()
+            else:
+                state = State.ERROR
+                break
+
+            # Connect control connection
+            control = zmqSvc.socket(zmq.PAIR)
+            control.connect(filling_svc.control_connection())
+
+            state = State.RUNNING
+        elif (command and command.startswith('stop')
+              and state in (State.RUNNING, State.READY)):
+            if gaudi:
+                gaudi.stop()
+            state = State.READY
+        elif command and command.startswith('reset'):
+            if zmqSvc:
+                control.send("TERMINATE")
+                if forwarder.is_alive():
+                    forwarder.join()
+
+            if gaudi:
+                gaudi.finalize()
+            state = State.NOT_READY
+            break
+        elif command is not None:
+            print ('[ERROR]: RunDB server: bad transition '
+                   'from %s to %s' % (state, command))
+            state = State.ERROR
+            break
+
+        if (command is not None) or (auto and state in (State.NOT_READY,
+                                                        State.READY,
+                                                        State.RUNNING)):
+            # Once we've auto-started, set auto to off to prevent
+            # further auto actions, for example after a stop command
+            if auto and state == State.RUNNING:
+                auto = False
+            # Set the status
+            com.set_status(state)
+
+        # Handle input from the forwarder
+        if (input_socket and input_socket in rep
+                and rep[input_socket] == zmq.POLLIN):
+            info = input_socket.recv_string()
+            # Reset the histograms
+            for histo in histos.itervalues():
+                histo.Reset()
+
+            for k, v in nodes_info(info).iteritems():
+                (host, _, row, node) = k
+                gd = v[3]
+                if gd != 2:
+                    continue
+                _, nt = node_type(*k[1:])
+                histos[nt].Fill((v[0] - v[1]) / v[0] if v[3] else 0.)
+
+            for histo in histos.itervalues():
+                monSvc.publishHistogram("DiskFilling", histo, add=False)
+
+    # Set our status one last time
+    com.set_status(state)
+
+
+if __name__ == '__main__':
+    run()
diff --git a/Online/Monitoring/python/Monitoring/Manager.py b/Online/Monitoring/python/Monitoring/Manager.py
index e5c0a60713080a217caa0819044ca53c941dc315..51328168bbafc452c9d026d10a7df060ff472457 100644
--- a/Online/Monitoring/python/Monitoring/Manager.py
+++ b/Online/Monitoring/python/Monitoring/Manager.py
@@ -5,6 +5,7 @@ import pydim
 import shlex
 from multiprocessing import Lock, Condition
 
+
 class State(object):
     UNKNOWN = "UNKNOWN"
     ERROR = "ERROR"
@@ -13,6 +14,7 @@ class State(object):
     RUNNING = "RUNNING"
     PAUSED = "PAUSED"
 
+
 class Manager(object):
     def __init__(self, utgid, command, env):
         self.state = State.UNKNOWN
@@ -23,10 +25,12 @@ class Manager(object):
         cmd = command
         if type(command) == str:
             cmd = shlex.split(command)
-        self.p = subprocess.Popen(cmd, env = os_env)
+        self.p = subprocess.Popen(cmd, env=os_env)
         print self.p.pid
 
-        self.__dim_svc = pydim.dic_info_service(utgid + '/status', "C", self.callback, pydim.MONITORED, 0, 0, None)
+        self.__dim_svc = pydim.dic_info_service(utgid + '/status', "C",
+                                                self.callback,
+                                                pydim.MONITORED, 0, 0, None)
         self.condition = Condition(Lock())
 
     def _notify(self):
@@ -34,7 +38,7 @@ class Manager(object):
         self.condition.notify()
         self.condition.release()
 
-    def send_command(self, command, wait = True):
+    def send_command(self, command, wait=True):
         if wait:
             self.condition.acquire()
             pydim.dic_cmnd_service(self.utgid, (command,), "C")
@@ -94,3 +98,7 @@ class Manager(object):
             self.state = State.NOT_READY
             print 'RESET'
             self._notify()
+        elif val.strip().startswith("ERROR"):
+            self.state = State.ERROR
+            print 'ERROR'
+            self._notify()
diff --git a/Online/Monitoring/python/Monitoring/MonitoringJob.py b/Online/Monitoring/python/Monitoring/MonitoringJob.py
index cf9bdeacd4093b751e498faf1e511ad0e39c34fb..9b66d3cf3d62be383fedf0e4bce65a195fb734ea 100644
--- a/Online/Monitoring/python/Monitoring/MonitoringJob.py
+++ b/Online/Monitoring/python/Monitoring/MonitoringJob.py
@@ -13,7 +13,7 @@ class MonitoringJob(ConfigurableUser):
                  "DimDNSNode": "mona08",
                  "Saver": True,
                  "Sender": True,
-                 "RegistrarConnection": None,
+                 "RegistrarConnection": "tcp://hist01:31360",
                  "HistogramDirectories": None}
 
     def connection(self, t, conType="ipc"):
@@ -117,8 +117,7 @@ class MonitoringJob(ConfigurableUser):
         saverSvc.UseRunInfoService = (self.getProp("RunInfoType") != "None")
         timeOut = 1. if (self.getProp("RunInfoType") != "Job") else 0.2
         saverSvc.RunInfoPollTimeout = timeOut
-        if self.isPropertySet("RegistrarConnection"):
-            saverSvc.RegistrarConnection = self.getProp("RegistrarConnection")
+        saverSvc.RegistrarConnection = self.getProp("RegistrarConnection")
         zmqSvcs.append(saverSvc)
 
         # Publish services, one per directory to make the histograms
@@ -137,35 +136,47 @@ class MonitoringJob(ConfigurableUser):
             svc.RunInPartitions = [self.getProp("Partition")]
 
 
-def start():
+def initialize(gaudi=None):
     mj = MonitoringJob()
-    from GaudiPython.Bindings import AppMgr, InterfaceCast, gbl
+
+    from GaudiPython.Bindings import AppMgr
     from Monitoring.RunDBInfoService import RunDBInfoService
     from Monitoring.MonitoringSvc import MonitoringSvc
 
-    gaudi = AppMgr()
+    if gaudi is None:
+        gaudi = AppMgr()
 
-    if mj.getProp("RunInfoType") == "RunDB":
-        RunDBInfoService("RunDBInfoService", Connection=mj.connection("info"))
+    monSvc = MonitoringSvc("MonitoringSvc")
 
-    monSvc = None
-    if mj.getProp("Sender"):
-        monSvc = MonitoringSvc("MonitoringSvc")
+    if mj.getProp("RunInfoType") == "RunDB":
+        RunDBInfoService("RunDBInfoService", Connection=mj.connection("info"),
+                         OutputLevel=1)
 
     # if we are running as part of GaudiOnline, it will fire the right
     # incident.
-    if mj.getProp("GaudiOnline"):
-        return gaudi, monSvc
+    if not mj.getProp("GaudiOnline"):
+        # Initialize and start Gaudi
+        sc = gaudi.initialize()
+        if not sc.isSuccess():
+            raise RuntimeError("Failed to initialize Gaudi")
+    return monSvc
 
-    # Initialize and start Gaudi
-    sc = gaudi.initialize()
-    if not sc.isSuccess():
-        raise RuntimeError("Failed to initialize Gaudi")
+
+def start():
+    from GaudiPython.Bindings import AppMgr, gbl, InterfaceCast
+    gaudi = AppMgr()
+
+    if gaudi.FSMState() < gbl.Gaudi.StateMachine.INITIALIZED:
+        monSvc = initialize(gaudi)
+    else:
+        # Trigger the decoration of AppMgr.service
+        from Monitoring import Service
+        monSvc = gaudi.service('MonitoringSvc')
 
     incSvc = gaudi.service("IncidentSvc").getInterface()
     incSvc = InterfaceCast(gbl.IIncidentSvc)(incSvc)
 
-    gaudi.start()
+    sc = gaudi.start()
     if not sc.isSuccess():
         raise RuntimeError("Failed to start Gaudi")
 
diff --git a/Online/Monitoring/python/Monitoring/MonitoringSvc.py b/Online/Monitoring/python/Monitoring/MonitoringSvc.py
index a8cd98c09d4f4376891da14732e35b16639dcef9..393dada0c34dedb9001731e537a8a36e76d3ab8b 100644
--- a/Online/Monitoring/python/Monitoring/MonitoringSvc.py
+++ b/Online/Monitoring/python/Monitoring/MonitoringSvc.py
@@ -24,13 +24,13 @@ class MonitoringSvc(PyService):
         t = type(histo)
         if (t not in self.__types):
             types = [str(ht) for ht in self.__types.keys()]
-            raise TypeError("Unsopported type of histogram:"
+            raise TypeError("Unsupported type of histogram:"
                             " %s, only %s are supported." %
                             (str(type(histo)), types))
         return self.__types[t]
 
     def zmq(self):
-        return self.__zmqSvc
+        return self.service(gbl.IZeroMQSvc, "ZeroMQSvc")
 
     def socket(self, t):
         return self.zmq().socket(t)
@@ -46,7 +46,6 @@ class MonitoringSvc(PyService):
         if self.RunNumberFromDIM:
             self.__dimSvc = self.service(gbl.DimMonitorSvc, "DimMonitorSvc")
 
-        self.__zmqSvc = self.service(gbl.IZeroMQSvc, "ZeroMQSvc")
         return sc
 
     def start(self):
@@ -68,7 +67,7 @@ class MonitoringSvc(PyService):
             self.__triggerSocket = self.socket(zmq.PAIR)
             self.__triggerSocket.setsockopt(zmq.LINGER, 0)
             self.__triggerSocket.connect("inproc://runNumber")
-            fun = gbl.DimHelper('int').dispatcher(self.__zmqSvc,
+            fun = gbl.DimHelper('int').dispatcher(self.zmq(),
                                                   self.__triggerSocket)
             self.__runNumber = monitor(self.RunNumberService, fun)
             # Start the thread that listens for run number triggers and
diff --git a/Online/Monitoring/python/Monitoring/PublicationListener.py b/Online/Monitoring/python/Monitoring/PublicationListener.py
index 69d0828e91f821e1bf866e7dfc8f1d3535eb82a6..b5877a6e76a26784660f1722d3593405a8e0591b 100644
--- a/Online/Monitoring/python/Monitoring/PublicationListener.py
+++ b/Online/Monitoring/python/Monitoring/PublicationListener.py
@@ -66,6 +66,7 @@ class PublicationListener(PyService):
 
         publishers = []
         publisher = 0
+        tries = {}
 
         timeo = 0
         wait_for_reply = False
@@ -85,11 +86,20 @@ class PublicationListener(PyService):
                 msg = ping.recv_string()
                 if msg == "PONG" and ping.more():
                     wait_for_reply = False
-                    msg = ping.recv('string')
-                    self.Verbose("Got PONG from app %s at %s" % (msg, address))
-                    if msg != publishers[publisher][0]:
-                        replace(context, *publishers[publisher], new=msg)
+                    app, host, pub_port, ping_port = publishers[publisher]
+                    remote_app = ping.recv('string')
+                    self.Verbose("Got PONG from app %s at %s" % (remote_app,
+                                                                 address))
+                    if remote_app != app:
+                        replace(context, app, host, pub_port, ping_port,
+                                remote_app)
+                        tries.pop((app, host))
+                        tries[(remote_app, host)] = 0
+                    else:
+                        tries[(app, host)] = 0
+
                     publisher += 1
+
                     if publisher == len(publishers):
                         timeo = self.__ping_freq
                     else:
@@ -108,6 +118,7 @@ class PublicationListener(PyService):
                 # list
                 if publisher == len(publishers):
                     publishers = request(context)
+                    tries = {(a, h): 0 for a, h, _, _ in publishers}
                     self.Verbose("Received publishers %s" % publishers)
                     # If we received a list of publishers, set the
                     # timeout to 0 to immediately start pinging,
@@ -119,20 +130,27 @@ class PublicationListener(PyService):
                     publisher = 0
                 # We were waiting for a reply to a ping, but none came
                 elif wait_for_reply:
-                    # A publisher is offline, send it to the server thread
-                    offline(context, *publishers[publisher])
+                    app, host, pub_port, ping_port = publishers[publisher]
                     # Since no reply was received, the ping socket is
                     # in a bad state, so make a new one
                     ping = make_ping()
                     wait_for_reply = False
-                    # Remove the offline publisher from our list
-                    publishers.pop(publisher)
-                    # If the offline publisher was the last one, wait longer
-                    if publisher == len(publishers):
-                        timeo = self.__ping_freq
+                    self.Debug("No reply to ping from %s on %s, %d tries." %
+                               (app, host, tries[(app, host)]))
+                    if tries[(app, host)] >= 2:
+                        # A publisher is offline, send it to the server thread
+                        offline(context, app, host, pub_port, ping_port)
+                        # Remove the offline publisher from our list
+                        publishers.pop(publisher)
+                        tries.pop((app, host))
+                        # If the offline publisher was the last one,
+                        # wait longer
+                        if publisher == len(publishers):
+                            timeo = self.__ping_freq
                 # The poll timeout is used to wait before sending the next ping
                 else:
-                    _, host, _, ping_port = publishers[publisher]
+                    app, host, _, ping_port = publishers[publisher]
+                    tries[(app, host)] += 1
                     address = "tcp://%s:%d" % (host, ping_port)
                     self.Verbose("Sending PING to %s" % address)
                     ping.connect(address)
diff --git a/Online/Monitoring/python/Monitoring/PublicationRegistrar.py b/Online/Monitoring/python/Monitoring/PublicationRegistrar.py
index ce870360a946f645e3aea20ea787204053dcb5cb..ff0015f8375eb77ce2e21791a90357b4cd9d8b53 100644
--- a/Online/Monitoring/python/Monitoring/PublicationRegistrar.py
+++ b/Online/Monitoring/python/Monitoring/PublicationRegistrar.py
@@ -204,6 +204,8 @@ class PublicationRegistrar(PublicationListener):
                         external.send_multipart(["AVAILABLE"] + info)
                     else:
                         external.send("UNKNOWN")
+                elif msg == "PING":
+                    external.send_multipart(["PONG", "Registrar"])
 
             # Message from ping thread, either request for list of
             # publishers or message that one of them is offline.
diff --git a/Online/Monitoring/python/Monitoring/RunDBInfoServer.py b/Online/Monitoring/python/Monitoring/RunDBInfoServer.py
index 24c5704537992b143d7350a23e5ee6c1e3f78442..5a55f488646bb13240b52831d732a7c869ed238d 100644
--- a/Online/Monitoring/python/Monitoring/RunDBInfoServer.py
+++ b/Online/Monitoring/python/Monitoring/RunDBInfoServer.py
@@ -2,7 +2,7 @@ import os
 from Communicator import (Communicator, State)
 
 
-def run(connection="ipc:///tmp/test"):
+def run(connection="ipc:///tmp/test", outputLevel=3):
     # Start the communicator:
     com = Communicator('RUNDBINFOSERV')
 
@@ -31,7 +31,8 @@ def run(connection="ipc:///tmp/test"):
             from RunDBInfoService import RunDBInfoService
             gaudi = AppMgr()
             RunDBInfoService("RunDBInfoService",
-                             Connection=connection)
+                             Connection=connection,
+                             OutputLevel=outputLevel)
             gaudi.initialize()
 
             state = State.READY
diff --git a/Online/Monitoring/python/Monitoring/RunDBInfoService.py b/Online/Monitoring/python/Monitoring/RunDBInfoService.py
index 9b9b3456fe7ba8679cbc26e05f2e8b7a959faf4a..9c04ffe1eee01363aa87e00998671eadc5559664 100644
--- a/Online/Monitoring/python/Monitoring/RunDBInfoService.py
+++ b/Online/Monitoring/python/Monitoring/RunDBInfoService.py
@@ -48,8 +48,16 @@ class RunDBInfoService(PyService):
     def function(self):
         from Monitoring.decorators import zmq
         self.Info("Started RunDB info thread.")
-        socket = self.zmq().socket(zmq.REP)
-        socket.bind(self.Connection)
+
+        def makeSocket():
+            socket = self.zmq().socket(zmq.REP)
+            socket.setsockopt(zmq.LINGER, 0)
+            socket.setsockopt(zmq.RCVTIMEO, 2000)
+            socket.setsockopt(zmq.SNDTIMEO, 500)
+            socket.bind(self.Connection)
+            return socket
+
+        socket = makeSocket()
 
         poller = zmq.Poller()
         poller.register(socket, zmq.POLLIN)
@@ -64,20 +72,41 @@ class RunDBInfoService(PyService):
                 self.Verbose("No run info requests in the last 10 seconds.")
                 continue
 
-            msg = socket.recv_string()
+            msg = None
+            try:
+                msg = socket.recv_string()
+            except Exception:
+                self.Warning("Failed to receive message, recreating socket.")
+                poller.unregister(socket)
+                socket = makeSocket()
+                poller.register(socket, zmq.POLLIN)
+                continue
             if msg == "TERMINATE":
                 socket.send("")
                 break
             run = None
+            application = None
 
             if msg != Monitoring.s_RunInfo:
                 self.Warning('Unknown info request: %s' % msg)
                 while socket.more():
                     socket.recv_message()
+                poller.unregister(socket)
+                socket = makeSocket()
+                poller.register(socket, zmq.POLLIN)
+                continue
+
+            try:
+                run = socket.recv(long)
+                application = socket.recv_string()
+            except Exception:
+                self.Warning(
+                    "Failed to receive run and application, recreating socket.")
+                poller.unregister(socket)
+                socket = makeSocket()
+                poller.register(socket, zmq.POLLIN)
                 continue
 
-            run = socket.recv(long)
-            application = socket.recv_string()
             # See if we got an integer as request, if not reply with INVALID
 
             # We got a run number, check if we already know its info, if not,
@@ -96,12 +125,25 @@ class RunDBInfoService(PyService):
                     run_info.deadtime = ri.get('avPhysDeadTime', -1)
                     infos[run] = run_info
                 except (RequestError, TypeError):
-                    socket.send_string("UNKNOWN")
-                    continue
+                    try:
+                        socket.send_string("UNKNOWN")
+                    except Exception:
+                        self.Warning(
+                            "Failed to send UNKNOWN, recreating socket.")
+                        poller.unregister(socket)
+                        socket = makeSocket()
+                        poller.register(socket, zmq.POLLIN)
+                        continue
             else:
                 run_info = infos[run]
 
             # Get the deadtime and reply with it.
-            socket.send_multipart([Monitoring.s_Known, run_info])
+            try:
+                socket.send_multipart([Monitoring.s_Known, run_info])
+            except Exception:
+                self.Warning("Failed to send reply, recreating socket.")
+                poller.unregister(socket)
+                socket = makeSocket()
+                poller.register(socket, zmq.POLLIN)
 
         return SUCCESS
diff --git a/Online/Monitoring/python/Monitoring/Service.py b/Online/Monitoring/python/Monitoring/Service.py
index 162796fa7249777315aa6f231f69fa95c0fa1a2e..97424dc76793873d859510b49feef77c98d5aa80 100644
--- a/Online/Monitoring/python/Monitoring/Service.py
+++ b/Online/Monitoring/python/Monitoring/Service.py
@@ -1,3 +1,4 @@
+from functools import wraps
 from GaudiPython.Bindings import iService, AppMgr, InterfaceCast, Helper
 from GaudiPython.GaudiAlgs import _hasProperty_
 import cppyy
@@ -7,6 +8,36 @@ gInterpreter = gbl.gInterpreter
 gInterpreter.Declare('#include <Monitoring/PyService.h>')
 
 
+def static_vars(**kwargs):
+    """ Add an attribute to the function that can be used as a static
+    variable. """
+    def decorate(func):
+        for k in kwargs:
+            setattr(func, k, kwargs[k])
+        return func
+    return decorate
+
+
+def _service_wrapper(func):
+    """
+    Wraper the AppMgr.service call to return GaudiPython-based
+    instances of services if they exist.
+    """
+    @wraps(func)
+    @static_vars(orig=func)
+    def wrapper(self, name, interface=None):
+        if ('GaudiPythonServices' in self.__dict__
+                and name in self.__dict__['GaudiPythonServices']):
+            return self.__dict__['GaudiPythonServices'][name]
+        else:
+            return wrapper.orig(self, name, interface)
+
+    return wrapper
+
+
+AppMgr.service = _service_wrapper(AppMgr.service)
+
+
 def _init_(self, name, **args):
     """
     The constructor from a unique service instance name & parameters
diff --git a/Online/Monitoring/python/Monitoring/TestUtilities.py b/Online/Monitoring/python/Monitoring/TestUtilities.py
index 38f8bd7cc3f7db0db8dba672d8e1b365f933e4c7..37ebbce025982049fa5c2f66c6c81e83ee7e8cdd 100644
--- a/Online/Monitoring/python/Monitoring/TestUtilities.py
+++ b/Online/Monitoring/python/Monitoring/TestUtilities.py
@@ -138,10 +138,14 @@ class Publisher(object):
                     print 'Got unknown control message:', msg
 
             if not sockets:
-                if n < len(self.__files):
+                if self.__files and n < len(self.__files):
                     filename = self.__files[n]
+                elif self.__files:
+                    print 'Publishing done.'
+                    break
                 else:
                     filename = "/tmp/test_%d" % n
+                print 'Publishing %s' % filename
                 publish.send(filename)
                 n += 1
 
@@ -152,9 +156,10 @@ class Publisher(object):
 
 class Registrar(object):
 
-    def __init__(self, reg_con="ipc:///tmp/registrar"):
+    def __init__(self, reg_con="ipc:///tmp/registrar", **kwargs):
         self.__reg_con = reg_con
         self.__reg = None
+        self.__args = kwargs
 
     def reg_con(self):
         return self.__reg_con
@@ -164,8 +169,8 @@ class Registrar(object):
             from PublicationRegistrar import PublicationRegistrar
             self.__reg = PublicationRegistrar("PublicationRegistrar",
                                               StartPort=31361,
-                                              OutputLevel=2,
-                                              RegistrarConnection=self.reg_con())
+                                              RegistrarConnection=self.reg_con(),
+                                              **self.__args)
             self.__reg.initialize()
         self.__reg.start()
 
diff --git a/Online/Monitoring/scripts/DiskMonitor.py b/Online/Monitoring/scripts/DiskMonitor.py
deleted file mode 100644
index 16c5e59561a8adfa6beebb68aa1c5863c98b6bd5..0000000000000000000000000000000000000000
--- a/Online/Monitoring/scripts/DiskMonitor.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import atexit
-import re
-import socket
-from Monitoring.DimMonitor import DimForwarder
-from Monitoring.decorators import zmq, gbl
-from Hlt2Monitoring.Utilities import node_type
-from Configurables import MonitoringJob
-from multiprocessing import Process
-from ROOT import TH1D
-from ROOT import kOrange, kGreen, kBlue, kMagenta
-
-filling_svc = DimForwarder('FarmStatus/StorageStatus', dim_dns_node="ecs03",
-                           connection_path="/tmp/DiskMonitor")
-p = Process(target=filling_svc)
-p.start()
-atexit.register(p.terminate)
-
-mj = MonitoringJob()
-mj.JobName = "DiskFilling"
-mj.SavePath = "/tmp/histograms"
-mj.RegistrarConnection = "tcp://%s:31360" % socket.gethostname()
-# Run the DIM monitoring service in another service to allow the DIM DNS node
-# to be ecs03 while we need mona08 to publish to the presenter
-
-# Start our main job
-from Monitoring.MonitoringJob import start
-gaudi, monSvc = start()
-
-
-def nodes_info(val):
-    node_re = re.compile(
-        r"hlt(?P<rack>[a-f])(?P<row>[0-9]{2})(?P<node>[0-9]{2})")
-    info = {}
-    s = val.split('|')
-    if not s:
-        return info
-    for entry in s:
-        if not entry:
-            continue
-        if not entry[0].isalnum():
-            break
-        host, numbers = entry.split(' ')
-        m = node_re.match(host)
-        if not m:
-            continue
-        n = numbers.split('/')
-        total, free, nd, gd = float(n[0]), float(n[1]), int(n[2]), int(n[3])
-        info[(host, m.group('rack'), int(m.group('row')),
-              int(m.group('node')))] = (total, free, nd, gd)
-    return info
-
-
-plots = {0: ("slow", kMagenta - 3),
-         1: ("medium", kOrange + 5),
-         2: ("fast", kBlue - 3),
-         3: ("faster", kGreen - 3)}
-
-histos = {}
-for nt, (name, color) in plots.iteritems():
-    histo = TH1D(name, name, 101, -0.05, 1.05)
-    histo.SetLineColor(color)
-    histo.SetLineWidth(2)
-    histos[nt] = histo
-
-zmqSvc = monSvc.zmq()
-input_socket = zmqSvc.socket(zmq.SUB)
-input_socket.connect(filling_svc.service_connection())
-input_socket.setsockopt(zmq.SUBSCRIBE, "")
-
-control = zmqSvc.socket(zmq.PAIR)
-control.connect(filling_svc.service_connection())
-
-while True:
-    info = None
-    try:
-        info = input_socket.recv_string()
-    except KeyboardInterrupt:
-        break
-
-    # Reset the histograms
-    for histo in histos.itervalues():
-        histo.Reset()
-
-    for k, v in nodes_info(info).iteritems():
-        rd = {k: i + 1 for i, k in enumerate('abcdef')}
-        (host, _, row, node) = k
-        gd = v[3]
-        if gd != 2:
-            continue
-        _, nt = node_type(*k[1:])
-        histos[nt].Fill((v[0] - v[1]) / v[0] if v[3] else 0.)
-
-    for histo in histos.itervalues():
-        monSvc.publishHistogram("DiskFilling", histo, add=False)
-
-zmqSvc.send(control, "TERMINATE")
-p.join()
-gaudi.finalize()
-gaudi.exit()
diff --git a/Online/Monitoring/scripts/registrar.py b/Online/Monitoring/scripts/registrar.py
index 9e37ce4f4bb18e4fa3e2c5ba02422b5d08fbac9d..0d0b7ce495f5d37381d802af3778493e7c1ad138 100644
--- a/Online/Monitoring/scripts/registrar.py
+++ b/Online/Monitoring/scripts/registrar.py
@@ -15,12 +15,7 @@ context = zmqSvc.context()
 hostname = socket.gethostname()
 reg_con = "tcp://*:31360"
 
-
-def ctrl_con(suffix):
-    return "inproc://control_%s" % suffix
-
-
-registrar = Registrar(reg_con)
+registrar = Registrar(reg_con, OutputLevel=2)
 print "Start registrar"
 registrar.start()
 
diff --git a/Online/Monitoring/scripts/registrar_request.py b/Online/Monitoring/scripts/registrar_request.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae55455d9ec26902cb4863a705cc98996705d08
--- /dev/null
+++ b/Online/Monitoring/scripts/registrar_request.py
@@ -0,0 +1,51 @@
+import pprint
+import socket
+import argparse
+from itertools import izip_longest
+from Monitoring.decorators import zmq
+from GaudiPython.Bindings import AppMgr, InterfaceCast, gbl
+
+
+parser = argparse.ArgumentParser(usage='usage: %(prog)s app')
+parser.add_argument("app", type=str, nargs='?', default="")
+parser.add_argument("-c", "--connection", type=str, dest="con",
+                    default="tcp://hist01:31360")
+args = parser.parse_args()
+
+
+gaudi = AppMgr()
+zmqSvc = gaudi.createSvc("ZeroMQSvc")
+zmqSvc = InterfaceCast(gbl.IZeroMQSvc)(zmqSvc)
+gaudi.initialize()
+gaudi.start()
+
+context = zmqSvc.context()
+
+hostname = socket.gethostname()
+
+req = context.socket(zmq.REQ)
+req.connect(args.con)
+
+
+def request(app):
+    msg = ["REQUEST"]
+    if app:
+        msg += [app]
+    req.send_multipart(msg)
+    msg = req.recv_string()
+    if msg == "AVAILABLE":
+        rest = []
+        while req.more():
+            rest.append(req.recv_string())
+        return rest
+    else:
+        return []
+
+
+def grouper(iterable, n, fillvalue=None):
+    args = [iter(iterable)] * n
+    return izip_longest(*args, fillvalue=fillvalue)
+
+
+for app in grouper(request(args.app), 4):
+    print ' '.join(app)
diff --git a/Online/Monitoring/scripts/test_disk_monitor.py b/Online/Monitoring/scripts/test_disk_monitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d04092818eff0bb1d948ac03cadc9944542a10
--- /dev/null
+++ b/Online/Monitoring/scripts/test_disk_monitor.py
@@ -0,0 +1,22 @@
+import os
+import atexit
+from Hlt2Monitoring.Manager import Manager
+
+utgid = 'TEST_DISKMONITOR_00'
+
+cmd = """
+import socket
+from Monitoring import DiskMonitor
+DiskMonitor.run(SavePath='/tmp/histograms',
+                RegistrarConnection='tcp://%s:31360' % socket.gethostname(),
+                DimDNSNode='mona08')
+"""
+
+os.environ['DIM_DNS_NODE'] = 'mona08'
+env = {'LC_ALL': 'C', 'UTGID': utgid, 'PARTITION': 'LHCb',
+       'PARTITION_NAME': 'LHCb'}
+
+command = 'python -c "%s"' % cmd
+manager = Manager(utgid, command, env)
+
+atexit.register(manager.terminate)
diff --git a/Online/Monitoring/scripts/test_poller.py b/Online/Monitoring/scripts/test_poller.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb58f400ce73e00a95fbe2c9e5675f03677b1d81
--- /dev/null
+++ b/Online/Monitoring/scripts/test_poller.py
@@ -0,0 +1,92 @@
+import random
+from time import sleep
+from multiprocessing import Process, Pipe
+from ZeroMQ.decorators import zmq
+from GaudiPython.Bindings import AppMgr, InterfaceCast, gbl
+
+gaudi = AppMgr()
+zmqSvc = gaudi.createSvc("ZeroMQSvc")
+zmqSvc = InterfaceCast(gbl.IZeroMQSvc)(zmqSvc)
+gaudi.initialize()
+
+
+def zmq_send():
+    gaudi.start()
+    context = zmqSvc.context()
+
+    sockets = []
+    for i in range(1, 3):
+        s = context.socket(zmq.PAIR)
+        s.connect('ipc:///tmp/test%d' % i)
+        sockets.append(s)
+
+    for i in range(5):
+        for s in sockets:
+            s.send(str(i))
+        sleep(1)
+    gaudi.stop()
+    gaudi.finalize()
+    gaudi.exit()
+
+
+zmq_p = Process(target=zmq_send)
+zmq_p.start()
+
+gaudi.start()
+context = zmqSvc.context()
+
+
+def pipe_send(p, s):
+    random.seed(s)
+    for i in range(3):
+        p.send(i)
+        sleep(random.randint(1, 4))
+
+
+processes = {}
+for i in range(3):
+    parent_con, child_con = Pipe()
+    p = Process(target=pipe_send, args=(child_con, i))
+    processes[i] = (p, parent_con)
+
+
+sockets = []
+for i in range(1, 3):
+    s = context.socket(zmq.PAIR)
+    s.bind('ipc:///tmp/test%d' % i)
+    sockets.append(s)
+
+poller = zmq.Poller()
+for socket in sockets:
+    poller.register(socket, zmq.POLLIN)
+
+for i, (p, pipe) in processes.iteritems():
+    if i < 3:
+        poller.register(pipe, zmq.POLLIN)
+    else:
+        poller.register(pipe.fileno(), zmq.POLLIN)
+    p.start()
+
+n = 0
+while n < 10:
+    rep = dict(poller.poll())
+    for s in sockets:
+        if s in rep and rep[s] == zmq.POLLIN:
+            print 'from socket', s.recv_string()
+    for _, s in processes.itervalues():
+        if s in rep and rep[s] == zmq.POLLIN:
+            print 'from pipe  ', s.recv()
+    n += 1
+
+
+for socket in sockets:
+    poller.unregister(socket)
+
+for i, (p, pipe) in processes.iteritems():
+    if i < 3:
+        poller.unregister(pipe)
+    else:
+        poller.unregister(pipe.fileno())
+    p.join()
+
+zmq_p.join()
diff --git a/Online/Monitoring/scripts/test_poller_rep.py b/Online/Monitoring/scripts/test_poller_rep.py
deleted file mode 100644
index cda179e6d3b31ccf62d475124ec9f7986a0dbe42..0000000000000000000000000000000000000000
--- a/Online/Monitoring/scripts/test_poller_rep.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from ZeroMQ.decorators import zmq
-
-
-from GaudiPython.Bindings import AppMgr, InterfaceCast, gbl
-
-gaudi = AppMgr()
-zmqSvc = gaudi.createSvc("ZeroMQSvc")
-zmqSvc = InterfaceCast(gbl.IZeroMQSvc)(zmqSvc)
-gaudi.initialize()
-gaudi.start()
-
-context = zmqSvc.context()
-
-sockets = []
-for i in range(1, 3):
-    s = context.socket(zmq.PAIR)
-    s.bind('ipc:///tmp/test%d' % i)
-    sockets.append(s)
-
-poller = zmq.Poller()
-for socket in sockets:
-    poller.register(socket, zmq.POLLIN)
-
-rep = dict(poller.poll())
diff --git a/Online/Monitoring/scripts/test_poller_req.py b/Online/Monitoring/scripts/test_poller_req.py
deleted file mode 100644
index 718687f1966e628a38edcf7dfe6ad38ea0648ec5..0000000000000000000000000000000000000000
--- a/Online/Monitoring/scripts/test_poller_req.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import zmq
-import time
-
-
-context = zmq.Context()
-sockets = []
-for i in range(1, 3):
-    s = context.socket(zmq.PAIR)
-    s.connect('ipc:///tmp/test%d' % i)
-    sockets.append(s)
-
-for i in range(5):
-    print i
-    for s in sockets:
-        s.send(str(i))
-    time.sleep(1)
diff --git a/Online/Monitoring/scripts/test_rundb.py b/Online/Monitoring/scripts/test_rundb.py
index 4e90539b30012f5f17cdd43356c0f07b79c33dcc..e01298e91eb58ac458f91c79d6053539f9e1a072 100644
--- a/Online/Monitoring/scripts/test_rundb.py
+++ b/Online/Monitoring/scripts/test_rundb.py
@@ -1,3 +1,4 @@
+import time
 from Configurables import ApplicationMgr
 ApplicationMgr().ExtSvc += ['ZeroMQSvc']
 
@@ -11,8 +12,8 @@ gaudi.initialize()
 gaudi.start()
 
 context = zmqSvc.context()
-run = 177444
-connection = 'ipc:///tmp/testRunDB'
+runs = [177444 + i for i in range(10)]
+connection = 'ipc:///run/HLT2/testRunDB'
 
 req = context.socket(zmq.REQ)
 req.connect(connection)
@@ -20,15 +21,17 @@ req.connect(connection)
 poller = zmq.Poller()
 poller.register(req, zmq.POLLIN)
 
-req.send_multipart(["RUNINFO", long(run), "TEST"])
+for run in runs:
+    req.send_multipart(["RUNINFO", long(run), "TEST"])
 
-sockets = dict(poller.poll(2000))
-if req in sockets and sockets[req] == zmq.POLLIN:
-    msg = req.recv_string()
-    if msg == "KNOWN":
-        run_info = req.recv(gbl.Monitoring.RunInfo)
-        print 'Got run info for run %d: %f' % (run, run_info.deadtime)
+    sockets = dict(poller.poll(2000))
+    if req in sockets and sockets[req] == zmq.POLLIN:
+        msg = req.recv_string()
+        if msg == "KNOWN":
+            run_info = req.recv(gbl.Monitoring.RunInfo)
+            print 'Got run info for run %d: %f' % (run, run_info.deadtime)
+        else:
+            print msg
     else:
-        print msg
-else:
-    print 'No reply from run DB server on %s' % connection
+        print 'No reply from run DB server on %s' % connection
+    time.sleep(2)
diff --git a/Online/Monitoring/scripts/test_rundb_info_serv.py b/Online/Monitoring/scripts/test_rundb_info_serv.py
index e2825b211ff1cbe779458a8b64d7ec3299c45732..ff779725c81ae46cc0c2f01374976d2b06d5e29f 100644
--- a/Online/Monitoring/scripts/test_rundb_info_serv.py
+++ b/Online/Monitoring/scripts/test_rundb_info_serv.py
@@ -4,13 +4,13 @@ from Monitoring.Manager import Manager
 
 utgid = 'TEST_RUNDBINFOSERV_00'
 
-connection = 'ipc:///tmp/test'
-os.environ['DIM_DNS_NODE'] = 'localhost'
+connection = 'ipc:///run/HLT2/testRunDB'
+os.environ['DIM_DNS_NODE'] = 'hlt01'
 cmd = """import GaudiKernel.ProcessJobOptions
 from Gaudi.Configuration import importOptions
 GaudiKernel.ProcessJobOptions.printing_level=3
 from Monitoring import RunDBInfoServer
-RunDBInfoServer.run('%(connection)s')
+RunDBInfoServer.run('%(connection)s', 2)
 """ % {'connection': connection}
 env = {'LC_ALL': 'C', 'UTGID': utgid,
        'PARTITION': 'LHCb2',
diff --git a/Online/ZeroMQ/ZeroMQ/ZeroMQPoller.h b/Online/ZeroMQ/ZeroMQ/ZeroMQPoller.h
index 465dcd919c5cb32066526ef977a48df7a49ba687..44aaf2d010152d56e8a103f35d05e74ddc25f827 100644
--- a/Online/ZeroMQ/ZeroMQ/ZeroMQPoller.h
+++ b/Online/ZeroMQ/ZeroMQ/ZeroMQPoller.h
@@ -12,28 +12,51 @@
 class ZeroMQPoller {
 public:
 
-   using entry_t = std::tuple<const zmq::socket_t*, size_t, zmq::PollType>;
+   using entry_t = std::tuple<size_t, zmq::PollType, const zmq::socket_t*>;
    // The key is what zmq::socket_t stores inside, and what goes into
    // pollitem_t through zmq::socket_t's conversion to void* operator
    using sockets_t = std::unordered_map<void*, entry_t>;
+
+   using fd_entry_t = std::tuple<size_t, zmq::PollType>;
+   using fds_t = std::unordered_map<int, fd_entry_t>;
+
    using free_t = std::deque<int>;
 
    ZeroMQPoller() = default;
 
-   // ZeroMQPoller(sockets_t sockets)
-   //    : m_sockets{std::move(sockets)} {}
-
    std::vector<std::pair<size_t, int>> poll(int timeo = -1)
    {
       std::vector<std::pair<size_t, int>> r;
-      if (m_sockets.empty()) throw std::runtime_error("No sockets registered");
-      zmq::poll(&m_items[0], m_items.size(), timeo);
+      if (m_items.empty()) {
+         throw std::runtime_error("No sockets registered");
+      }
+      while (true) {
+         try {
+            auto n = zmq::poll(&m_items[0], m_items.size(), timeo);
+            if (n == 0) return r;
+            break;
+         } catch (const zmq::error_t& e) {
+            if (e.num() != EINTR) {
+               std::cerr << e.what() << std::endl;
+               throw;
+            }
+         }
+      }
       // TODO: replace this with ranges::v3::zip
       for (size_t i = 0; i < m_items.size(); ++i) {
-         auto socket = static_cast<zmq::socket_t*>(m_items[i].socket);
-         auto entry = m_sockets[socket];
-         if (m_items[i].revents & short(std::get<2>(entry))) {
-            r.emplace_back(std::get<1>(entry), std::get<2>(entry));
+         void* socket = m_items[i].socket;
+         size_t index = 0;
+         int flags = 0;
+         if (socket == nullptr) {
+            // an fd was registered
+            std::tie(index, flags) = m_fds[m_items[i].fd];
+         } else {
+            // a socket was registered
+            const zmq::socket_t* s;
+            std::tie(index, flags, s) = m_sockets[socket];
+         }
+         if (m_items[i].revents & short(flags)) {
+            r.emplace_back(index, flags);
          }
       }
       return r;
@@ -41,7 +64,7 @@ public:
 
    size_t size() const
    {
-      return m_sockets.size();
+      return m_items.size();
    }
 
    size_t register_socket(zmq::socket_t& socket, zmq::PollType type)
@@ -49,19 +72,37 @@ public:
       zmq::socket_t* s = &socket;
       auto it = m_sockets.find(s);
       if (it != m_sockets.end()) {
-         return std::get<1>(it->second);
+         return std::get<0>(it->second);
       }
-      size_t index = m_free.empty() ? m_sockets.size() : m_free.front();
+      size_t index = m_free.empty() ? m_items.size() : m_free.front();
       if (!m_free.empty()) m_free.pop_front();
       // NOTE: tis uses the conversion-to-void* operator of
       // zmq::socket_t, which returns the wrapped object
       m_items.push_back({socket, 0, type, 0});
 
       // We need to lookup by the pointer to the object wrapped by zmq::socket_t
-      m_sockets.emplace(m_items.back().socket, std::make_tuple(s, index, type));
+      m_sockets.emplace(m_items.back().socket, std::make_tuple(index, type, s));
+      return index;
+   }
+
+   size_t register_socket(int fd, zmq::PollType type)
+   {
+      auto it = m_fds.find(fd);
+      if (it != m_fds.end()) {
+         return std::get<0>(it->second);
+      }
+      size_t index = m_free.empty() ? m_items.size() : m_free.front();
+      if (!m_free.empty()) m_free.pop_front();
+      // NOTE: tis uses the conversion-to-void* operator of
+      // zmq::socket_t, which returns the wrapped object
+      m_items.push_back({nullptr, fd, type, 0});
+
+      // We need to lookup by the pointer to the object wrapped by zmq::socket_t
+      m_fds.emplace(fd, std::make_tuple(index, type));
       return index;
    }
 
+
    size_t unregister_socket(zmq::socket_t& socket)
    {
       if (!m_sockets.count(socket.operator void*())) {
@@ -73,9 +114,9 @@ public:
       // (zmq::socket_t)
       auto it = std::find_if(begin(m_sockets), end(m_sockets),
                              [&socket](const decltype(m_sockets)::value_type& entry) {
-                                return &socket == std::get<0>(entry.second);
+                                return &socket == std::get<2>(entry.second);
                              });
-      auto index = std::get<1>(it->second);
+      auto index = std::get<0>(it->second);
       m_free.push_back(index);
       m_sockets.erase(it);
 
@@ -89,12 +130,37 @@ public:
       return index;
    }
 
+   size_t unregister_socket(int fd)
+   {
+      if (!m_fds.count(fd)) {
+            throw std::out_of_range("fileno is not registered");
+      }
+      // Remove from m_fds
+      auto it = m_fds.find(fd);
+      auto index = std::get<0>(it->second);
+      m_free.push_back(index);
+      m_fds.erase(it);
+
+      // Remove from m_items
+      auto iit = std::find_if(begin(m_items), end(m_items), [&it](const zmq::pollitem_t& item) {
+            return it->first == item.fd;
+         });
+      assert(iit != end(m_items));
+      m_items.erase(iit);
+
+      return index;
+   }
+
+
 private:
 
    // Vector of (socket, flags)
-   free_t m_free;
    std::vector<zmq::pollitem_t> m_items;
    sockets_t m_sockets;
+   fds_t m_fds;
+
+   // free slots in items
+   free_t m_free;
 };
 
 #endif // ZEROMQPOLLER_H
diff --git a/Online/ZeroMQ/python/ZeroMQ/decorators.py b/Online/ZeroMQ/python/ZeroMQ/decorators.py
index ed11fdeae7f7a60890fa182b18cb99eeac45b1e9..136d8368a945e51609ab6b699edfc5a31cb65341 100644
--- a/Online/ZeroMQ/python/ZeroMQ/decorators.py
+++ b/Online/ZeroMQ/python/ZeroMQ/decorators.py
@@ -1,11 +1,15 @@
+import os
 import cppyy
 import pickle
+import errno
+from functools import wraps
 
 gbl = cppyy.gbl
 
 gbl.ROOT.EnableThreadSafety()
 gInterpreter = gbl.gInterpreter
 gInterpreter.Load('libzmq.so')
+gInterpreter.Load('libZMQ.so')
 gInterpreter.Declare('#include <ZeroMQ/IZeroMQSvc.h>')
 gInterpreter.Declare('#include <ZeroMQ/ZeroMQHelper.h>')
 gInterpreter.Declare('#include <ZeroMQ/ZeroMQPoller.h>')
@@ -15,9 +19,12 @@ gInterpreter.Declare('#include <ZeroMQ/functions.h>')
 IZeroMQSvc = cppyy.gbl.IZeroMQSvc
 Helper = cppyy.gbl.ZeroMQHelper
 TObject = cppyy.gbl.TObject
+zmq = cppyy.gbl.zmq
 
 
 def static_vars(**kwargs):
+    """ Add an attribute to the function that can be used as a static
+    variable. """
     def decorate(func):
         for k in kwargs:
             setattr(func, k, kwargs[k])
@@ -25,6 +32,19 @@ def static_vars(**kwargs):
     return decorate
 
 
+def protect_interrupt(func):
+    """ Protect agains interrupted systemcall exception by trying again."""
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        while True:
+            try:
+                return func(*args, **kwargs)
+            except IOError, e:
+                if e.errno != errno.EINTR:
+                    raise
+    return wrapper
+
+
 @static_vars(svc=None)
 def _get_svc():
     if _get_svc.svc is None:
@@ -51,6 +71,7 @@ def _recv(self, T):
     # This method may block, so inform TMethodProxy that the GIL should be
     # released
     recv._threaded = True
+    recv = protect_interrupt(recv)
     r = recv(_get_svc(), self)
     self._more = r.second
     return r.first
@@ -104,7 +125,7 @@ def _recv_string(self):
 
 
 def _recv_message(self):
-    return _recv(self, cppyy.gbl.zmq.message_t)
+    return _recv(self, zmq.message_t)
 
 
 def _socket_init(self, context, t):
@@ -125,15 +146,40 @@ def _init_(self):
 
 
 def _register(self, socket, flags):
-    i = self.register_socket(socket, flags)
+    i = None
+    if isinstance(socket, zmq.socket_t):
+        i = self.register_socket(socket, flags)
+    elif hasattr(socket, 'fileno'):
+        i = self.register_socket(socket.fileno(), flags)
+    elif type(socket) == int:
+        try:
+            os.fstat(socket)
+            i = self.register_socket(socket, flags)
+        except OSError:
+            pass
+    if i is None:
+        raise TypeError("Socket must be either zmq::socket_t, have a"
+                        " fileno function that returns a valid file"
+                        "descriptor, or be a valid file descriptor.")
     self._sockets[int(i)] = socket
 
 
 def _unregister(self, socket):
-    i = self.unregister_socket(socket)
+    i = None
+    if isinstance(socket, zmq.socket_t):
+        i = self.unregister_socket(socket)
+    elif hasattr(socket, 'fileno'):
+        i = self.unregister_socket(socket.fileno())
+    elif type(socket) == int:
+        i = self.unregister_socket(socket)
+    else:
+        raise TypeError("Socket must be either zmq::socket_t, have a"
+                        " fileno function that returns a valid file"
+                        "descriptor, or be a valid file descriptor.")
     self._sockets.pop(int(i))
 
 
+@protect_interrupt
 def _poll(self, timeout=-1):
     sockets = self._cpp_poll_(timeout)
     return {self._sockets[int(s.first)]: s.second for s in sockets}
@@ -148,11 +194,6 @@ Poller.poll = _poll
 Poller.register = _register
 Poller.unregister = _unregister
 
-# Convenience functions to for setsockopt
-# Library load seems to be necessary...
-cppyy.gbl.gSystem.Load('libZMQ.so')
-
-zmq = cppyy.gbl.zmq
 zmq.Poller = Poller
 
 # Decorate socket_t