diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt b/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt
index 7d3c90c2232dd5a9e253e4a4512fa30ff7cea734..6ebe0c92bb000adc70c1d31c459b614394763979 100644
--- a/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt
+++ b/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt
@@ -55,6 +55,7 @@ atlas_depends_on_subdirs(
 find_package( Boost COMPONENTS filesystem thread system )
 find_package( CLHEP )
 find_package( Eigen )
+find_package( lwtnn )
 find_package( ROOT COMPONENTS TMVA Core Tree MathCore Hist RIO pthread
     MathMore Minuit Minuit2 Matrix Physics HistPainter Rint RooFitCore RooFit )
 
@@ -86,11 +87,13 @@ atlas_add_library(
     src/parse_json.cxx src/Stack.cxx src/RNNIPTag.cxx
 
     PUBLIC_HEADERS JetTagTools
-    INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${CLHEP_INCLUDE_DIRS} ${EIGEN_INCLUDE_DIRS}
+    INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${CLHEP_INCLUDE_DIRS}
+    ${EIGEN_INCLUDE_DIRS} ${LWTNN_INCLUDE_DIRS}
     PRIVATE_INCLUDE_DIRS ${Boost_INCLUDE_DIRS} ${FASTJET_INCLUDE_DIRS}
     DEFINITIONS ${CLHEP_DEFINITIONS}
 
     LINK_LIBRARIES ${ROOT_LIBRARIES} ${CLHEP_LIBRARIES} ${EIGEN_LIBRARIES}
+    ${LWTNN_LIBRARIES}
     AsgTools AthenaBaseComps SGTools GeoPrimitives xAODBTagging xAODJet
     xAODTracking GaudiKernel JetTagInfo JetSubStructureUtils TrkParameters
     JetRecLib JetSubStructureMomentToolsLib egammaMVACalibLib MVAUtils
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h
index 20dccc1eae9851fb8e6e66a466b289c774bfd564..68fc3efe827b9d678751c0333c2d3ace48522daf 100644
--- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h
+++ b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h
@@ -15,7 +15,7 @@
     @authors Dan Guest, Luke de Oliveira, Marie Lanfermann
 ********************************************************/
 #include "AthenaBaseComps/AthAlgTool.h"
-#include "JetTagTools/NNLayerConfig.h"
+#include "lwtnn/lightweight_network_config.hh"
 #include "JetTagTools/IMultivariateJetTagger.h"
 
 #include <vector>
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h
deleted file mode 100644
index 384dc35729ce75692772e63bdd433ab66117943b..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#ifndef LIGHTWEIGHT_NEURAL_NETWORK_HH
-#define LIGHTWEIGHT_NEURAL_NETWORK_HH
-
-// Lightweight Neural Networks
-//
-// This is a simple NN implementation, designed to be lightweight in
-// terms of both size and dependencies. For sanity we use Eigen, but
-// otherwise this aims to be a minimal NN class which is fully
-// configurable at runtime.
-//
-// The classes defined here are the high level wrappers: they don't
-// directly include any Eigen code (to speed compliation of algorithms
-// that use them), and they store data in STL objects.
-//
-// Authors: Dan Guest <dguest@cern.ch>
-//          Michael Kagan <mkagan@cern.ch>
-//          Michela Paganini <micky.91@hotmail.com>
-
-#include "NNLayerConfig.h"
-
-namespace lwt {
-
-  class Stack;
-  class RecurrentStack;
-  class InputPreprocessor;
-  class InputVectorPreprocessor;
-
-  // use a normal map externally, since these are more common in user
-  // code.
-  // TODO: is it worth changing to unordered_map?
-  typedef std::map<std::string, double> ValueMap;
-  typedef std::vector<std::pair<std::string, double> > ValueVector;
-  typedef std::map<std::string, std::vector<double> > VectorMap;
-
-  // ______________________________________________________________________
-  // high-level wrappers
-
-  // feed-forward variant
-  class LightweightNeuralNetwork
-  {
-  public:
-    LightweightNeuralNetwork(const std::vector<Input>& inputs,
-                             const std::vector<LayerConfig>& layers,
-                             const std::vector<std::string>& outputs);
-    ~LightweightNeuralNetwork();
-    // disable copying until we need it...
-    LightweightNeuralNetwork(LightweightNeuralNetwork&) = delete;
-    LightweightNeuralNetwork& operator=(LightweightNeuralNetwork&) = delete;
-
-    // use a normal map externally, since these are more common in
-    // user code.
-    // TODO: is it worth changing to unordered_map?
-    ValueMap compute(const ValueMap&) const;
-
-  private:
-    // use the Stack class above as the computational core
-    Stack* m_stack;
-    InputPreprocessor* m_preproc;
-
-    // output labels
-    std::vector<std::string> m_outputs;
-
-  };
-
-  // recurrent version
-  class LightweightRNN
-  {
-  public:
-    LightweightRNN(const std::vector<Input>& inputs,
-                   const std::vector<LayerConfig>& layers,
-                   const std::vector<std::string>& outputs);
-    ~LightweightRNN();
-    LightweightRNN(LightweightRNN&) = delete;
-    LightweightRNN& operator=(LightweightRNN&) = delete;
-
-    ValueMap reduce(const std::vector<ValueMap>&) const;
-    ValueMap reduce(const VectorMap&) const;
-  private:
-    RecurrentStack* m_stack;
-    InputPreprocessor* m_preproc;
-    InputVectorPreprocessor* m_vec_preproc;
-    std::vector<std::string> m_outputs;
-    size_t m_n_inputs;
-  };
-
-}
-#endif
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h
deleted file mode 100644
index 9cc599e7d81990ecbb1daaba6ffc3111b014ed31..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#ifndef NN_LAYER_CONFIG_HH
-#define NN_LAYER_CONFIG_HH
-
-// Layer Configiruation for Lightweight Tagger
-//
-// The structures below are used to initalize
-// `LightweightNeuralNetwork` and the simpler `Stack`.
-//
-// Author: Dan Guest <dguest@cern.ch>
-
-#include <vector>
-#include <string>
-#include <map>
-
-namespace lwt {
-  enum class Activation {NONE, LINEAR, SIGMOID, RECTIFIED, SOFTMAX, TANH,
-      HARD_SIGMOID};
-  enum class Architecture {NONE, DENSE, NORMALIZATION, MAXOUT, HIGHWAY, 
-      LSTM, GRU, EMBEDDING};
-  // components (for LSTM, etc)
-  enum class Component {
-    I, O, C, F,                 // LSTM
-      Z, R, H,                  // GRU
-      T, CARRY};                // Highway
-
-  // structure for embedding layers
-  struct EmbeddingConfig
-  {
-    std::vector<double> weights;
-    int index;
-    int n_out;
-  };
-
-  // main layer configuration
-  struct LayerConfig
-  {
-    // dense layer info
-    std::vector<double> weights;
-    std::vector<double> bias;
-    std::vector<double> U;      // TODO: what is this thing called in LSTMs?
-    Activation activation;
-    Activation inner_activation; // for LSTMs and GRUs
-
-    // additional info for sublayers
-    std::vector<LayerConfig> sublayers;
-    std::map<Component, LayerConfig> components;
-    std::vector<EmbeddingConfig> embedding;
-
-    // arch flag
-    Architecture architecture;
-  };
-
-  struct Input
-  {
-    std::string name;
-    double offset;
-    double scale;
-  };
-}
-
-#endif
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h
deleted file mode 100644
index 0b1b56889e561edc43f18874d7a373367a056b02..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#ifndef STACK_HH
-#define STACK_HH
-
-// Stack classes
-//
-// These are the low-level classes that implement feed-forward and
-// recurrent neural networks. All the Eigen-dependant code in this
-// library should live in this file.
-//
-// To keep the Eigen code out of the high-level interface, the STL ->
-// Eigen ``preprocessor'' classes are also defined here.
-//
-// The ordering of classes is as follows:
-//  - Feed-forward Stack class
-//  - Feed-forward Layer classes
-//  - RecurrentStack class
-//  - Recurrent layers
-//  - Activation functions
-//  - Various utility functions
-//  - Preprocessor classes
-
-
-#include "Exceptions.h"
-#include "NNLayerConfig.h"
-
-#include <Eigen/Dense>
-
-#include <vector>
-#include <map>
-#include <functional>
-
-namespace lwt {
-
-  using Eigen::VectorXd;
-  using Eigen::MatrixXd;
-
-  class ILayer;
-  class IRecurrentLayer;
-
-  // use a normal map externally, since these are more common in user
-  // code.  TODO: is it worth changing to unordered_map?
-  typedef std::map<std::string, double> ValueMap;
-  typedef std::vector<std::pair<std::string, double> > ValueVector;
-  typedef std::map<std::string, std::vector<double> > VectorMap;
-
-
-  // ______________________________________________________________________
-  // Feed forward Stack class
-
-  class Stack
-  {
-  public:
-    // constructor for dummy net
-    Stack();
-    // constructor for real net
-    Stack(size_t n_inputs, const std::vector<LayerConfig>& layers,
-          size_t skip_layers = 0);
-    ~Stack();
-
-    // make non-copyable for now
-    Stack(Stack&) = delete;
-    Stack& operator=(Stack&) = delete;
-
-    VectorXd compute(VectorXd) const;
-    size_t n_outputs() const;
-
-  private:
-    // returns the size of the next layer
-    size_t add_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_dense_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_normalization_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_highway_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_maxout_layers(size_t n_inputs, const LayerConfig&);
-    std::vector<ILayer*> m_layers;
-    size_t m_n_outputs;
-  };
-
-  // _______________________________________________________________________
-  // Feed-forward layers
-
-  class ILayer
-  {
-  public:
-    virtual ~ILayer() {}
-    virtual VectorXd compute(const VectorXd&) const = 0;
-  };
-
-  class DummyLayer: public ILayer
-  {
-  public:
-    virtual VectorXd compute(const VectorXd&) const;
-  };
-
-  class UnaryActivationLayer: public ILayer
-  {
-  public:
-    UnaryActivationLayer(Activation);
-    virtual VectorXd compute(const VectorXd&) const;
-  private:
-    std::function<double(double)> m_func;
-  };
-
-  class SoftmaxLayer: public ILayer
-  {
-  public:
-    virtual VectorXd compute(const VectorXd&) const;
-  };
-
-  class BiasLayer: public ILayer
-  {
-  public:
-    BiasLayer(const VectorXd& bias);
-    BiasLayer(const std::vector<double>& bias);
-    virtual VectorXd compute(const VectorXd&) const;
-  private:
-    VectorXd m_bias;
-  };
-
-  class MatrixLayer: public ILayer
-  {
-  public:
-    MatrixLayer(const MatrixXd& matrix);
-    virtual VectorXd compute(const VectorXd&) const;
-  private:
-    MatrixXd m_matrix;
-  };
-
-  class MaxoutLayer: public ILayer
-  {
-  public:
-    typedef std::pair<MatrixXd, VectorXd> InitUnit;
-    MaxoutLayer(const std::vector<InitUnit>& maxout_tensor);
-    virtual VectorXd compute(const VectorXd&) const;
-  private:
-    std::vector<MatrixXd> m_matrices;
-    MatrixXd m_bias;
-  };
-
-
-  /// Normalization layer ///
-  /// https://arxiv.org/abs/1502.03167 ///
-  class NormalizationLayer : public ILayer
-  {
-
-  public:
-    NormalizationLayer(const VectorXd& W,const VectorXd& b);
-    virtual VectorXd compute(const VectorXd&) const;
-
-  private:
-    VectorXd _W;
-    VectorXd _b;
-
-  };
-
-  //http://arxiv.org/pdf/1505.00387v2.pdf
-  class HighwayLayer: public ILayer
-  {
-  public:
-    HighwayLayer(const MatrixXd& W,
-                 const VectorXd& b,
-                 const MatrixXd& W_carry,
-                 const VectorXd& b_carry,
-                 Activation activation);
-    virtual VectorXd compute(const VectorXd&) const;
-  private:
-    MatrixXd m_w_t;
-    VectorXd m_b_t;
-    MatrixXd m_w_c;
-    VectorXd m_b_c;
-    std::function<double(double)> m_act;
-  };
-
-  // ______________________________________________________________________
-  // Recurrent Stack
-
-  class RecurrentStack
-  {
-  public:
-    RecurrentStack(size_t n_inputs, const std::vector<LayerConfig>& layers);
-    ~RecurrentStack();
-    RecurrentStack(RecurrentStack&) = delete;
-    RecurrentStack& operator=(RecurrentStack&) = delete;
-    VectorXd reduce(MatrixXd inputs) const;
-    size_t n_outputs() const;
-  private:
-    std::vector<IRecurrentLayer*> m_layers;
-    size_t add_lstm_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_gru_layers(size_t n_inputs, const LayerConfig&);
-    size_t add_embedding_layers(size_t n_inputs, const LayerConfig&);
-    Stack* m_stack;
-  };
-
-
-  // __________________________________________________________________
-  // Recurrent layers
-
-  class IRecurrentLayer
-  {
-  public:
-    virtual ~IRecurrentLayer() {}
-    virtual MatrixXd scan( const MatrixXd&) = 0;
-  };
-
-  class EmbeddingLayer : public IRecurrentLayer
-  {
-  public:
-    EmbeddingLayer(int var_row_index, MatrixXd W);
-    virtual ~EmbeddingLayer() {};
-    virtual MatrixXd scan( const MatrixXd&);
-
-  private:
-    int m_var_row_index;
-    MatrixXd m_W;
-  };
-
-  /// long short term memory ///
-  class LSTMLayer : public IRecurrentLayer
-  {
-  public:
-    LSTMLayer(Activation activation, Activation inner_activation,
-        MatrixXd W_i, MatrixXd U_i, VectorXd b_i,
-        MatrixXd W_f, MatrixXd U_f, VectorXd b_f,
-        MatrixXd W_o, MatrixXd U_o, VectorXd b_o,
-        MatrixXd W_c, MatrixXd U_c, VectorXd b_c,
-        bool return_sequences = true);
-
-    virtual ~LSTMLayer() {};
-    virtual VectorXd step( const VectorXd&);
-    virtual MatrixXd scan( const MatrixXd&);
-
-  private:
-    std::function<double(double)> m_activation_fun;
-    std::function<double(double)> m_inner_activation_fun;
-
-    MatrixXd m_W_i;
-    MatrixXd m_U_i;
-    VectorXd m_b_i;
-
-    MatrixXd m_W_f;
-    MatrixXd m_U_f;
-    VectorXd m_b_f;
-
-    MatrixXd m_W_o;
-    MatrixXd m_U_o;
-    VectorXd m_b_o;
-
-    MatrixXd m_W_c;
-    MatrixXd m_U_c;
-    VectorXd m_b_c;
-
-    //states
-    MatrixXd m_C_t;
-    MatrixXd m_h_t;
-    int m_time;
-
-    int m_n_outputs;
-
-    bool m_return_sequences;
-  };
-
-  /// gated recurrent unit ///
-  class GRULayer : public IRecurrentLayer
-  {
-  public:
-    GRULayer(Activation activation, Activation inner_activation,
-        MatrixXd W_z, MatrixXd U_z, VectorXd b_z,
-        MatrixXd W_r, MatrixXd U_r, VectorXd b_r,
-        MatrixXd W_h, MatrixXd U_h, VectorXd b_h,
-        bool return_sequences = true);
-
-    virtual ~GRULayer() {};
-    virtual VectorXd step( const VectorXd&);
-    virtual MatrixXd scan( const MatrixXd&);
-
-  private:
-    std::function<double(double)> m_activation_fun;
-    std::function<double(double)> m_inner_activation_fun;
-
-    MatrixXd m_W_z;
-    MatrixXd m_U_z;
-    VectorXd m_b_z;
-
-    MatrixXd m_W_r;
-    MatrixXd m_U_r;
-    VectorXd m_b_r;
-
-    MatrixXd m_W_h;
-    MatrixXd m_U_h;
-    VectorXd m_b_h;
-
-    //states
-    MatrixXd m_h_t;
-    int m_time;
-
-    int m_n_outputs;
-
-    bool m_return_sequences;
-  };
-
-  // ______________________________________________________________________
-  // Activation functions
-
-  // note that others are supported but are too simple to
-  // require a special function
-  double nn_sigmoid( double x );
-  double nn_hard_sigmoid( double x );
-  double nn_tanh( double x );
-  double nn_relu( double x );
-  std::function<double(double)> get_activation(lwt::Activation);
-
-  // WARNING: you own this pointer! Only call when assigning to member data!
-  ILayer* get_raw_activation_layer(Activation);
-
-  // ______________________________________________________________________
-  // utility functions
-
-  // functions to build up basic units from vectors
-  MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs);
-  VectorXd build_vector(const std::vector<double>& bias);
-
-  // consistency checks
-  void throw_if_not_maxout(const LayerConfig& layer);
-  void throw_if_not_dense(const LayerConfig& layer);
-  void throw_if_not_normalization(const LayerConfig& layer);
-
-  // LSTM component for convenience in some layers
-  struct DenseComponents
-  {
-    Eigen::MatrixXd W;
-    Eigen::MatrixXd U;
-    Eigen::VectorXd b;
-  };
-  DenseComponents get_component(const lwt::LayerConfig& layer, size_t n_in);
-
-  // ______________________________________________________________________
-  // input preprocessor (handles normalization and packing into Eigen)
-
-  class InputPreprocessor
-  {
-  public:
-    InputPreprocessor(const std::vector<Input>& inputs);
-    VectorXd operator()(const ValueMap&) const;
-  private:
-    // input transformations
-    VectorXd m_offsets;
-    VectorXd m_scales;
-    std::vector<std::string> m_names;
-  };
-
-  class InputVectorPreprocessor
-  {
-  public:
-    InputVectorPreprocessor(const std::vector<Input>& inputs);
-    MatrixXd operator()(const VectorMap&) const;
-  private:
-    // input transformations
-    VectorXd m_offsets;
-    VectorXd m_scales;
-    std::vector<std::string> m_names;
-  };
-
-}
-
-#endif // STACK_HH
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h
deleted file mode 100644
index 6a6fde505ebccd286d3a8f215b2c0faca28ce661..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#ifndef PARSE_JSON_HH
-#define PARSE_JSON_HH
-
-#include "NNLayerConfig.h"
-
-#include <istream>
-#include <map>
-
-namespace lwt {
-  struct JSONConfig
-  {
-    std::vector<LayerConfig> layers;
-    std::vector<Input> inputs;
-    std::vector<std::string> outputs;
-    std::map<std::string, double> defaults;
-    std::map<std::string, std::string> miscellaneous;
-  };
-  JSONConfig parse_json(std::istream& json);
-}
-
-
-#endif
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx
index b6e88c2092fa3b720db1a7a186a085a63f374f03..4a0b5ddda491ae28e74fc3a9b37bb6a426ecfb43 100644
--- a/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx
+++ b/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx
@@ -22,9 +22,10 @@
 
 #include "JetTagCalibration/CalibrationBroker.h"
 #include "JetTagTools/DL1Tag.h"
-#include "JetTagTools/LightweightNeuralNetwork.h"
-#include "JetTagTools/parse_json.h"
-#include "JetTagTools/Exceptions.h"
+
+#include "lwtnn/LightweightNeuralNetwork.hh"
+#include "lwtnn/parse_json.hh"
+#include "lwtnn/Exceptions.hh"
 
 #include "xAODBTagging/BTagging.h"
 #include "xAODJet/Jet.h"
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx
deleted file mode 100644
index 9fe83d0e318e366b33a8ea73f47de147944964cc..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#include "JetTagTools/LightweightNeuralNetwork.h"
-#include "JetTagTools/Stack.h"
-#include <Eigen/Dense>
-
-#include <set>
-
-// internal utility functions
-namespace {
-  using namespace Eigen;
-  using namespace lwt;
-}
-namespace lwt {
-
-  // ______________________________________________________________________
-  // LightweightNeuralNetwork HL wrapper
-  LightweightNeuralNetwork::LightweightNeuralNetwork(
-    const std::vector<Input>& inputs,
-    const std::vector<LayerConfig>& layers,
-    const std::vector<std::string>& outputs):
-    m_stack(new Stack(inputs.size(), layers)),
-    m_preproc(new InputPreprocessor(inputs)),
-    m_outputs(outputs.begin(), outputs.end())
-  {
-    if (m_outputs.size() != m_stack->n_outputs()) {
-      std::string problem = "internal stack has " +
-        std::to_string(m_stack->n_outputs()) + " outputs, but " +
-        std::to_string(m_outputs.size()) + " were given";
-      throw NNConfigurationException(problem);
-    }
-  }
-
-  LightweightNeuralNetwork::~LightweightNeuralNetwork() {
-    delete m_stack;
-    m_stack = 0;
-    delete m_preproc;
-    m_preproc = 0;
-  }
-
-  lwt::ValueMap
-  LightweightNeuralNetwork::compute(const lwt::ValueMap& in) const {
-
-    // compute outputs
-    const auto& preproc = *m_preproc;
-    auto outvec = m_stack->compute(preproc(in));
-    assert(outvec.rows() > 0);
-    auto out_size = static_cast<size_t>(outvec.rows());
-    assert(out_size == m_outputs.size());
-
-    // build and return output map
-    lwt::ValueMap out_map;
-    for (size_t out_n = 0; out_n < out_size; out_n++) {
-      out_map.emplace(m_outputs.at(out_n), outvec(out_n));
-    }
-    return out_map;
-  }
-
-  // ______________________________________________________________________
-  // LightweightRNN
-
-  LightweightRNN::LightweightRNN(const std::vector<Input>& inputs,
-                                 const std::vector<LayerConfig>& layers,
-                                 const std::vector<std::string>& outputs):
-    m_stack(new RecurrentStack(inputs.size(), layers)),
-    m_preproc(new InputPreprocessor(inputs)),
-    m_vec_preproc(new InputVectorPreprocessor(inputs)),
-    m_outputs(outputs.begin(), outputs.end()),
-    m_n_inputs(inputs.size())
-  {
-    if (m_outputs.size() != m_stack->n_outputs()) {
-      throw NNConfigurationException(
-        "Mismatch between NN output dimensions and output labels");
-    }
-  }
-  LightweightRNN::~LightweightRNN() {
-    delete m_stack;
-    delete m_preproc;
-    delete m_vec_preproc;
-  }
-
-  ValueMap LightweightRNN::reduce(const std::vector<ValueMap>& in) const {
-    const auto& preproc = *m_preproc;
-    MatrixXd inputs(m_n_inputs, in.size());
-    for (size_t iii = 0; iii < in.size(); iii++) {
-      inputs.col(iii) = preproc(in.at(iii));
-    }
-    auto outvec = m_stack->reduce(inputs);
-    ValueMap out;
-    const auto n_rows = static_cast<size_t>(outvec.rows());
-    for (size_t iii = 0; iii < n_rows; iii++) {
-      out.emplace(m_outputs.at(iii), outvec(iii));
-    }
-    return out;
-  }
-
-  // this version should be slightly faster since it only has to sort
-  // the inputs once
-  ValueMap LightweightRNN::reduce(const VectorMap& in) const {
-    const auto& preproc = *m_vec_preproc;
-    auto outvec = m_stack->reduce(preproc(in));
-    ValueMap out;
-    const auto n_rows = static_cast<size_t>(outvec.rows());
-    for (size_t iii = 0; iii < n_rows; iii++) {
-      out.emplace(m_outputs.at(iii), outvec(iii));
-    }
-    return out;
-  }
-
-
-}
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx
index 200cc9e47ac046696c2f5991769335528bf9a362..5fa0862ff8e14cb4dfb0cb806303471339e750bb 100644
--- a/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx
+++ b/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx
@@ -3,9 +3,10 @@
 */
 
 #include "JetTagTools/RNNIPTag.h"
-#include "JetTagTools/LightweightNeuralNetwork.h"
-#include "JetTagTools/parse_json.h"
-#include "JetTagTools/Exceptions.h"
+
+#include "lwtnn/LightweightNeuralNetwork.hh"
+#include "lwtnn/Exceptions.hh"
+#include "lwtnn/parse_json.hh"
 
 #include "JetTagTools/TrackSelector.h"
 #include "JetTagTools/GradedTrack.h"
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx
deleted file mode 100644
index 1875492b9d99dc84bf1cdd64ed80972399fc8461..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#include "JetTagTools/Stack.h"
-#include <Eigen/Dense>
-
-#include <set>
-
-// internal utility functions
-namespace {
-  using namespace Eigen;
-  using namespace lwt;
-}
-namespace lwt {
-
-  // ______________________________________________________________________
-  // Feed forward Stack class
-
-  // dummy construction routine
-  Stack::Stack() {
-    m_layers.push_back(new DummyLayer);
-    m_layers.push_back(new UnaryActivationLayer(Activation::SIGMOID));
-    m_layers.push_back(new BiasLayer(std::vector<double>{1, 1, 1, 1}));
-    MatrixXd mat(4, 4);
-    mat <<
-      0, 0, 0, 1,
-      0, 0, 1, 0,
-      0, 1, 0, 0,
-      1, 0, 0, 0;
-    m_layers.push_back(new MatrixLayer(mat));
-    m_n_outputs = 4;
-  }
-
-  // construct from LayerConfig
-  Stack::Stack(size_t n_inputs, const std::vector<LayerConfig>& layers,
-               size_t skip) {
-    for (size_t nnn = skip; nnn < layers.size(); nnn++) {
-      n_inputs = add_layers(n_inputs, layers.at(nnn));
-    }
-    // the final assigned n_inputs is the number of output nodes
-    m_n_outputs = n_inputs;
-  }
-
-  Stack::~Stack() {
-    for (auto& layer: m_layers) {
-      delete layer;
-      layer = 0;
-    }
-  }
-  VectorXd Stack::compute(VectorXd in) const {
-    for (const auto& layer: m_layers) {
-      in = layer->compute(in);
-    }
-    return in;
-  }
-  size_t Stack::n_outputs() const {
-    return m_n_outputs;
-  }
-
-
-  // Private Stack methods to add various types of layers
-  //
-  // top level add_layers method. This delegates to the other methods
-  // below
-  size_t Stack::add_layers(size_t n_inputs, const LayerConfig& layer) {
-    if (layer.architecture == Architecture::DENSE) {
-      return add_dense_layers(n_inputs, layer);
-    } else if (layer.architecture == Architecture::NORMALIZATION){
-      return add_normalization_layers(n_inputs, layer);
-    } else if (layer.architecture == Architecture::HIGHWAY){
-      return add_highway_layers(n_inputs, layer);
-    } else if (layer.architecture == Architecture::MAXOUT) {
-      return add_maxout_layers(n_inputs, layer);
-    }
-    throw NNConfigurationException("unknown architecture");
-  }
-
-  size_t Stack::add_dense_layers(size_t n_inputs, const LayerConfig& layer) {
-    assert(layer.architecture == Architecture::DENSE);
-    throw_if_not_dense(layer);
-
-    size_t n_outputs = n_inputs;
-
-    // add matrix layer
-    if (layer.weights.size() > 0) {
-      MatrixXd matrix = build_matrix(layer.weights, n_inputs);
-      n_outputs = matrix.rows();
-      m_layers.push_back(new MatrixLayer(matrix));
-    };
-
-    // add bias layer
-    if (layer.bias.size() > 0) {
-      if (n_outputs != layer.bias.size() ) {
-        std::string problem = "tried to add a bias layer with " +
-          std::to_string(layer.bias.size()) + " entries, previous layer"
-          " had " + std::to_string(n_outputs) + " outputs";
-        throw NNConfigurationException(problem);
-      }
-      m_layers.push_back(new BiasLayer(layer.bias));
-    }
-
-    // add activation layer
-    if (layer.activation != Activation::LINEAR) {
-      m_layers.push_back(get_raw_activation_layer(layer.activation));
-    }
-
-    return n_outputs;
-  }
-
-  size_t Stack::add_normalization_layers(size_t n_inputs, const LayerConfig& layer) {
-    assert(layer.architecture == Architecture::NORMALIZATION);
-    throw_if_not_normalization(layer);
-
-    // Do some checks
-    if ( layer.weights.size() < 1 || layer.bias.size() < 1 ) {
-      std::string problem = "Either weights or bias layer size is < 1";
-      throw NNConfigurationException(problem);
-    };
-    if ( layer.weights.size() != layer.bias.size() ) {
-      std::string problem = "weights and bias layer are not equal in size!";
-      throw NNConfigurationException(problem);
-    };
-    VectorXd v_weights = build_vector(layer.weights);
-    VectorXd v_bias = build_vector(layer.bias);
-
-    m_layers.push_back(
-      new NormalizationLayer(v_weights, v_bias));
-    return n_inputs;
-  }
-
-
-  size_t Stack::add_highway_layers(size_t n_inputs, const LayerConfig& layer) {
-    auto& comps = layer.components;
-    const auto& t = get_component(comps.at(Component::T), n_inputs);
-    const auto& c = get_component(comps.at(Component::CARRY), n_inputs);
-
-    m_layers.push_back(
-      new HighwayLayer(t.W, t.b, c.W, c.b, layer.activation));
-    return n_inputs;
-  }
-
-
-  size_t Stack::add_maxout_layers(size_t n_inputs, const LayerConfig& layer) {
-    assert(layer.architecture == Architecture::MAXOUT);
-    throw_if_not_maxout(layer);
-    std::vector<MaxoutLayer::InitUnit> matrices;
-    std::set<size_t> n_outputs;
-    for (const auto& sublayer: layer.sublayers) {
-      MatrixXd matrix = build_matrix(sublayer.weights, n_inputs);
-      VectorXd bias = build_vector(sublayer.bias);
-      n_outputs.insert(matrix.rows());
-      matrices.push_back(std::make_pair(matrix, bias));
-    }
-    if (n_outputs.size() == 0) {
-      throw NNConfigurationException("tried to build maxout withoutweights!");
-    }
-    else if (n_outputs.size() != 1) {
-      throw NNConfigurationException("uneven matrices for maxout");
-    }
-    m_layers.push_back(new MaxoutLayer(matrices));
-    return *n_outputs.begin();
-  }
-
-
-  // _______________________________________________________________________
-  // Feed-forward layers
-
-  VectorXd DummyLayer::compute(const VectorXd& in) const {
-    return in;
-  }
-
-  // activation functions
-  UnaryActivationLayer::UnaryActivationLayer(Activation act):
-    m_func(get_activation(act))
-  {
-  }
-  VectorXd UnaryActivationLayer::compute(const VectorXd& in) const {
-    return in.unaryExpr(m_func);
-  }
-
-  VectorXd SoftmaxLayer::compute(const VectorXd& in) const {
-    size_t n_elements = in.rows();
-    VectorXd exp(n_elements);
-    for (size_t iii = 0; iii < n_elements; iii++) {
-      exp(iii) = std::exp(in(iii));
-    }
-    double sum_exp = exp.sum();
-    return exp / sum_exp;
-  }
-
-  // bias layer
-  BiasLayer::BiasLayer(const VectorXd& bias): m_bias(bias)
-  {
-  }
-  BiasLayer::BiasLayer(const std::vector<double>& bias):
-    m_bias(build_vector(bias))
-  {
-  }
-  VectorXd BiasLayer::compute(const VectorXd& in) const {
-    return in + m_bias;
-  }
-
-  // basic dense matrix layer
-  MatrixLayer::MatrixLayer(const MatrixXd& matrix):
-    m_matrix(matrix)
-  {
-  }
-  VectorXd MatrixLayer::compute(const VectorXd& in) const {
-    return m_matrix * in;
-  }
-
-  // maxout layer
-  MaxoutLayer::MaxoutLayer(const std::vector<MaxoutLayer::InitUnit>& units):
-    m_bias(units.size(), units.front().first.rows())
-  {
-    int out_pos = 0;
-    for (const auto& unit: units) {
-      m_matrices.push_back(unit.first);
-      m_bias.row(out_pos) = unit.second;
-      out_pos++;
-    }
-  }
-  VectorXd MaxoutLayer::compute(const VectorXd& in) const {
-    // eigen supports tensors, but only in the experimental component
-    // for now just stick to matrix and vector classes
-    const size_t n_mat = m_matrices.size();
-    const size_t out_dim = m_matrices.front().rows();
-    MatrixXd outputs(n_mat, out_dim);
-    for (size_t mat_n = 0; mat_n < n_mat; mat_n++) {
-      outputs.row(mat_n) = m_matrices.at(mat_n) * in;
-    }
-    outputs += m_bias;
-    return outputs.colwise().maxCoeff();
-  }
-
-   // Normalization layer
-   NormalizationLayer::NormalizationLayer(const VectorXd& W,
-                                          const VectorXd& b):
-    _W(W), _b(b)
-  {
-  }
-  VectorXd NormalizationLayer::compute(const VectorXd& in) const {
-    VectorXd shift = in + _b ;
-    return _W.cwiseProduct(shift);
-  }
-
-  // highway layer
-  HighwayLayer::HighwayLayer(const MatrixXd& W,
-                             const VectorXd& b,
-                             const MatrixXd& W_carry,
-                             const VectorXd& b_carry,
-                             Activation activation):
-    m_w_t(W), m_b_t(b), m_w_c(W_carry), m_b_c(b_carry),
-    m_act(get_activation(activation))
-  {
-  }
-  VectorXd HighwayLayer::compute(const VectorXd& in) const {
-    const std::function<double(double)> sig(nn_sigmoid);
-    ArrayXd c = (m_w_c * in + m_b_c).unaryExpr(sig);
-    ArrayXd t = (m_w_t * in + m_b_t).unaryExpr(m_act);
-    return c * t + (1 - c) * in.array();
-  }
-
-  // ______________________________________________________________________
-  // Recurrent Stack
-
-  RecurrentStack::RecurrentStack(size_t n_inputs,
-                                 const std::vector<lwt::LayerConfig>& layers)
-  {
-    using namespace lwt;
-    size_t layer_n = 0;
-    const size_t n_layers = layers.size();
-    for (;layer_n < n_layers; layer_n++) {
-      auto& layer = layers.at(layer_n);
-
-      // add recurrent layers (now LSTM and GRU!)
-      if (layer.architecture == Architecture::LSTM) {
-        n_inputs = add_lstm_layers(n_inputs, layer);
-      } else if (layer.architecture == Architecture::GRU) {
-        n_inputs = add_gru_layers(n_inputs, layer);
-      } else if (layer.architecture == Architecture::EMBEDDING) {
-        n_inputs = add_embedding_layers(n_inputs, layer);
-      } else {
-        // leave this loop if we're done with the recurrent stuff
-        break;
-      }
-    }
-    // fill the remaining dense layers
-    m_stack = new Stack(n_inputs, layers, layer_n);
-  }
-  RecurrentStack::~RecurrentStack() {
-    for (auto& layer: m_layers) {
-      delete layer;
-      layer = 0;
-    }
-    delete m_stack;
-    m_stack = 0;
-  }
-  VectorXd RecurrentStack::reduce(MatrixXd in) const {
-    for (auto* layer: m_layers) {
-      in = layer->scan(in);
-    }
-    return m_stack->compute(in.col(in.cols() - 1));
-  }
-  size_t RecurrentStack::n_outputs() const {
-    return m_stack->n_outputs();
-  }
-
-  size_t RecurrentStack::add_lstm_layers(size_t n_inputs,
-                                         const LayerConfig& layer) {
-    auto& comps = layer.components;
-    const auto& i = get_component(comps.at(Component::I), n_inputs);
-    const auto& o = get_component(comps.at(Component::O), n_inputs);
-    const auto& f = get_component(comps.at(Component::F), n_inputs);
-    const auto& c = get_component(comps.at(Component::C), n_inputs);
-    m_layers.push_back(
-      new LSTMLayer(layer.activation, layer.inner_activation,
-                    i.W, i.U, i.b,
-                    f.W, f.U, f.b,
-                    o.W, o.U, o.b,
-                    c.W, c.U, c.b));
-    return o.b.rows();
-  }
-
-  size_t RecurrentStack::add_gru_layers(size_t n_inputs,
-                                         const LayerConfig& layer) {
-    auto& comps = layer.components;
-    const auto& z = get_component(comps.at(Component::Z), n_inputs);
-    const auto& r = get_component(comps.at(Component::R), n_inputs);
-    const auto& h = get_component(comps.at(Component::H), n_inputs);
-    m_layers.push_back(
-      new GRULayer(layer.activation, layer.inner_activation,
-                    z.W, z.U, z.b,
-                    r.W, r.U, r.b,
-                    h.W, h.U, h.b));
-    return h.b.rows();
-  }
-
-  size_t RecurrentStack::add_embedding_layers(size_t n_inputs,
-                                              const LayerConfig& layer) {
-    for (const auto& emb: layer.embedding) {
-      size_t n_wt = emb.weights.size();
-      size_t n_cats = n_wt / emb.n_out;
-      MatrixXd mat = build_matrix(emb.weights, n_cats);
-      m_layers.push_back(new EmbeddingLayer(emb.index, mat));
-      n_inputs += emb.n_out - 1;
-    }
-    return n_inputs;
-  }
-
-  // __________________________________________________________________
-  // Recurrent layers
-
-  EmbeddingLayer::EmbeddingLayer(int var_row_index, MatrixXd W):
-    m_var_row_index(var_row_index),
-    m_W(W)
-  {
-    if(var_row_index < 0)
-      throw NNConfigurationException(
-        "EmbeddingLayer::EmbeddingLayer - can not set var_row_index<0,"
-        " it is an index for a matrix row!");
-  }
-
-  MatrixXd EmbeddingLayer::scan( const MatrixXd& x) {
-
-    if( m_var_row_index >= x.rows() )
-      throw NNEvaluationException(
-        "EmbeddingLayer::scan - var_row_index is larger than input matrix"
-        " number of rows!");
-
-    MatrixXd embedded(m_W.rows(), x.cols());
-
-    for(int icol=0; icol<x.cols(); icol++) {
-      double vector_idx = x(m_var_row_index, icol);
-      bool is_int = std::floor(vector_idx) == vector_idx;
-      bool is_valid = (vector_idx >= 0) && (vector_idx < m_W.cols());
-      if (!is_int || !is_valid) throw NNEvaluationException(
-        "Invalid embedded index: " + std::to_string(vector_idx));
-      embedded.col(icol) = m_W.col( vector_idx );
-    }
-
-    //only embed 1 variable at a time, so this should be correct size
-    MatrixXd out(m_W.rows() + (x.rows() - 1), x.cols());
-
-    //assuming m_var_row_index is an index with first possible value of 0
-    if(m_var_row_index > 0)
-      out.topRows(m_var_row_index) = x.topRows(m_var_row_index);
-
-    out.block(m_var_row_index, 0, embedded.rows(), embedded.cols()) = embedded;
-
-    if( m_var_row_index < (x.rows()-1) )
-      out.bottomRows( x.cols() - 1 - m_var_row_index)
-        = x.bottomRows( x.cols() - 1 - m_var_row_index);
-
-    return out;
-  }
-
-
-  // LSTM layer
-  LSTMLayer::LSTMLayer(Activation activation, Activation inner_activation,
-           MatrixXd W_i, MatrixXd U_i, VectorXd b_i,
-           MatrixXd W_f, MatrixXd U_f, VectorXd b_f,
-           MatrixXd W_o, MatrixXd U_o, VectorXd b_o,
-           MatrixXd W_c, MatrixXd U_c, VectorXd b_c,
-           bool return_sequences):
-    m_W_i(W_i),
-    m_U_i(U_i),
-    m_b_i(b_i),
-    m_W_f(W_f),
-    m_U_f(U_f),
-    m_b_f(b_f),
-    m_W_o(W_o),
-    m_U_o(U_o),
-    m_b_o(b_o),
-    m_W_c(W_c),
-    m_U_c(U_c),
-    m_b_c(b_c),
-    m_time(-1),
-    m_return_sequences(return_sequences)
-  {
-    m_n_outputs = m_W_o.rows();
-
-    m_activation_fun = get_activation(activation);
-    m_inner_activation_fun = get_activation(inner_activation);
-  }
-
-  VectorXd LSTMLayer::step( const VectorXd& x_t ) {
-    // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L740
-
-    if(m_time < 0)
-      throw NNEvaluationException(
-        "LSTMLayer::compute - time is less than zero!");
-
-    const auto& act_fun = m_activation_fun;
-    const auto& in_act_fun = m_inner_activation_fun;
-
-    int tm1 = std::max(0, m_time - 1);
-    VectorXd h_tm1 = m_h_t.col(tm1);
-    VectorXd C_tm1 = m_C_t.col(tm1);
-
-    VectorXd i  =  (m_W_i*x_t + m_b_i + m_U_i*h_tm1).unaryExpr(in_act_fun);
-    VectorXd f  =  (m_W_f*x_t + m_b_f + m_U_f*h_tm1).unaryExpr(in_act_fun);
-    VectorXd o  =  (m_W_o*x_t + m_b_o + m_U_o*h_tm1).unaryExpr(in_act_fun);
-    VectorXd ct =  (m_W_c*x_t + m_b_c + m_U_c*h_tm1).unaryExpr(act_fun);
-
-    m_C_t.col(m_time) = f.cwiseProduct(C_tm1) + i.cwiseProduct(ct);
-    m_h_t.col(m_time) = o.cwiseProduct( m_C_t.col(m_time).unaryExpr(act_fun) );
-
-    return VectorXd( m_h_t.col(m_time) );
-  }
-
-  MatrixXd LSTMLayer::scan( const MatrixXd& x ){
-
-    m_C_t.resize(m_n_outputs, x.cols());
-    m_C_t.setZero();
-    m_h_t.resize(m_n_outputs, x.cols());
-    m_h_t.setZero();
-    m_time = -1;
-
-
-    for(m_time=0; m_time < x.cols(); m_time++) {
-      this->step( x.col( m_time ) );
-    }
-
-    return m_return_sequences ? m_h_t : m_h_t.col(m_h_t.cols() - 1);
-  }
-
-
-  // GRU layer
-  GRULayer::GRULayer(Activation activation, Activation inner_activation,
-           MatrixXd W_z, MatrixXd U_z, VectorXd b_z,
-           MatrixXd W_r, MatrixXd U_r, VectorXd b_r,
-           MatrixXd W_h, MatrixXd U_h, VectorXd b_h,
-           bool return_sequences):
-    m_W_z(W_z),
-    m_U_z(U_z),
-    m_b_z(b_z),
-    m_W_r(W_r),
-    m_U_r(U_r),
-    m_b_r(b_r),
-    m_W_h(W_h),
-    m_U_h(U_h),
-    m_b_h(b_h),
-    m_time(-1),
-    m_return_sequences(return_sequences)
-  {
-    m_n_outputs = m_W_h.rows();
-
-    m_activation_fun = get_activation(activation);
-    m_inner_activation_fun = get_activation(inner_activation);
-  }
-
-  VectorXd GRULayer::step( const VectorXd& x_t ) {
-    // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L547
-
-    if(m_time < 0)
-      throw NNEvaluationException(
-        "LSTMLayer::compute - time is less than zero!");
-
-    const auto& act_fun = m_activation_fun;
-    const auto& in_act_fun = m_inner_activation_fun;
-
-    int tm1 = std::max(0, m_time - 1);
-    VectorXd h_tm1 = m_h_t.col(tm1);
-    //VectorXd C_tm1 = m_C_t.col(tm1);
-    VectorXd z  = (m_W_z*x_t + m_b_z + m_U_z*h_tm1).unaryExpr(in_act_fun);
-    VectorXd r  = (m_W_r*x_t + m_b_r + m_U_r*h_tm1).unaryExpr(in_act_fun);
-    VectorXd hh = (m_W_h*x_t + m_b_h + m_U_h*(r.cwiseProduct(h_tm1))).unaryExpr(act_fun); 
-    m_h_t.col(m_time)  = z.cwiseProduct(h_tm1) + (VectorXd::Ones(z.size()) - z).cwiseProduct(hh);
-
-    return VectorXd( m_h_t.col(m_time) );
-  }
-
-  MatrixXd GRULayer::scan( const MatrixXd& x ){
-
-    m_h_t.resize(m_n_outputs, x.cols());
-    m_h_t.setZero();
-    m_time = -1;
-
-    for(m_time=0; m_time < x.cols(); m_time++){
-  this->step( x.col( m_time ) );
-      }
-
-    return m_return_sequences ? m_h_t : m_h_t.col(m_h_t.cols() - 1);
-  }
-
-  // _____________________________________________________________________
-  // Activation functions
-  //
-  // There are two functions below. In most cases the activation layer
-  // can be implemented as a unary function, but in some cases
-  // (i.e. softmax) something more complicated is reqired.
-
-  // Note that in the first case you own this layer! It's your
-  // responsibility to delete it.
-  ILayer* get_raw_activation_layer(Activation activation) {
-    // Check for special cases. If it's not one, use
-    // UnaryActivationLayer
-    switch (activation) {
-    case Activation::SOFTMAX: return new SoftmaxLayer;
-    default: return new UnaryActivationLayer(activation);
-    }
-  }
-
-  // Most activation functions should be handled here.
-  std::function<double(double)> get_activation(lwt::Activation act) {
-    using namespace lwt;
-    switch (act) {
-    case Activation::SIGMOID: return nn_sigmoid;
-    case Activation::HARD_SIGMOID: return nn_hard_sigmoid;
-    case Activation::TANH: return nn_tanh;
-    case Activation::RECTIFIED: return nn_relu;
-    case Activation::LINEAR: return [](double x){return x;};
-    default: {
-      throw NNConfigurationException("Got undefined activation function");
-    }
-    }
-  }
-
-
-  double nn_sigmoid( double x ){
-    //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L35
-    if (x < -30.0) return 0.0;
-    if (x >  30.0) return 1.0;
-    return 1.0 / (1.0 + std::exp(-1.0*x));
-  }
-
-  double nn_hard_sigmoid( double x ){
-    //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L279
-    double out = 0.2*x + 0.5;
-    if (out < 0) return 0.0;
-    if (out > 1) return 1.0;
-    return out;
-  }
-
-  double nn_tanh( double x ){
-    return std::tanh(x);
-  }
-
-  double nn_relu( double x) {
-    if (std::isnan(x)) return x;
-    else return x > 0 ? x : 0;
-  }
-
-
-  // ________________________________________________________________________
-  // utility functions
-  MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs)
-  {
-    size_t n_elements = weights.size();
-    if ((n_elements % n_inputs) != 0) {
-      std::string problem = "matrix elements not divisible by number"
-        " of columns. Elements: " + std::to_string(n_elements) +
-        ", Inputs: " + std::to_string(n_inputs);
-      throw lwt::NNConfigurationException(problem);
-    }
-    size_t n_outputs = n_elements / n_inputs;
-    MatrixXd matrix(n_outputs, n_inputs);
-    for (size_t row = 0; row < n_outputs; row++) {
-      for (size_t col = 0; col < n_inputs; col++) {
-        double element = weights.at(col + row * n_inputs);
-        matrix(row, col) = element;
-      }
-    }
-    return matrix;
-  }
-  VectorXd build_vector(const std::vector<double>& bias) {
-    VectorXd out(bias.size());
-    size_t idx = 0;
-    for (const auto& val: bias) {
-      out(idx) = val;
-      idx++;
-    }
-    return out;
-  }
-
-  // consistency checks
-  void throw_if_not_maxout(const LayerConfig& layer) {
-    bool wt_ok = layer.weights.size() == 0;
-    bool bias_ok = layer.bias.size() == 0;
-    bool maxout_ok = layer.sublayers.size() > 0;
-    bool act_ok = layer.activation == Activation::NONE;
-    if (wt_ok && bias_ok && maxout_ok && act_ok) return;
-    throw NNConfigurationException("layer has wrong info for maxout");
-  }
-  void throw_if_not_dense(const LayerConfig& layer) {
-    if (layer.sublayers.size() > 0) {
-      throw NNConfigurationException("sublayers in dense layer");
-    }
-  }
-
-  void throw_if_not_normalization(const LayerConfig& layer) {
-    if (layer.sublayers.size() > 0) {
-      throw NNConfigurationException("sublayers in normalization layer");
-    }
-  }
-
-  // component-wise getters (for Highway, lstm, etc)
-  DenseComponents get_component(const lwt::LayerConfig& layer, size_t n_in) {
-    using namespace Eigen;
-    using namespace lwt;
-    MatrixXd weights = build_matrix(layer.weights, n_in);
-    size_t n_out = weights.rows();
-    VectorXd bias = build_vector(layer.bias);
-
-    // the u element is optional
-    size_t u_el = layer.U.size();
-    MatrixXd U = u_el ? build_matrix(layer.U, n_out) : MatrixXd::Zero(0,0);
-
-    size_t u_out = U.rows();
-    size_t b_out = bias.rows();
-    bool u_mismatch = (u_out != n_out) && (u_out > 0);
-    if ( u_mismatch || b_out != n_out) {
-      throw NNConfigurationException(
-        "Output dims mismatch, W: " + std::to_string(n_out) +
-        ", U: " + std::to_string(u_out) + ", b: " + std::to_string(b_out));
-    }
-    return {weights, U, bias};
-  }
-
-
-  // ______________________________________________________________________
-  // Input preprocessors
-
-  // simple feed-forwared version
-  InputPreprocessor::InputPreprocessor(const std::vector<Input>& inputs):
-    m_offsets(inputs.size()),
-    m_scales(inputs.size())
-  {
-    size_t in_num = 0;
-    for (const auto& input: inputs) {
-      m_offsets(in_num) = input.offset;
-      m_scales(in_num) = input.scale;
-      m_names.push_back(input.name);
-      in_num++;
-    }
-  }
-  VectorXd InputPreprocessor::operator()(const ValueMap& in) const {
-    VectorXd invec(m_names.size());
-    size_t input_number = 0;
-    for (const auto& in_name: m_names) {
-      if (!in.count(in_name)) {
-        throw NNEvaluationException("can't find input: " + in_name);
-      }
-      invec(input_number) = in.at(in_name);
-      input_number++;
-    }
-    return (invec + m_offsets).cwiseProduct(m_scales);
-  }
-
-
-  // Input vector preprocessor
-  InputVectorPreprocessor::InputVectorPreprocessor(
-    const std::vector<Input>& inputs):
-    m_offsets(inputs.size()),
-    m_scales(inputs.size())
-  {
-    size_t in_num = 0;
-    for (const auto& input: inputs) {
-      m_offsets(in_num) = input.offset;
-      m_scales(in_num) = input.scale;
-      m_names.push_back(input.name);
-      in_num++;
-    }
-    // require at least one input at configuration, since we require
-    // at least one for evaluation
-    if (in_num == 0) {
-      throw NNConfigurationException("need at least one input");
-    }
-  }
-  MatrixXd InputVectorPreprocessor::operator()(const VectorMap& in) const {
-    using namespace Eigen;
-    if (in.size() == 0) {
-      throw NNEvaluationException("Empty input map");
-    }
-    size_t n_cols = in.begin()->second.size();
-    MatrixXd inmat(m_names.size(), n_cols);
-    size_t in_num = 0;
-    for (const auto& in_name: m_names) {
-      if (!in.count(in_name)) {
-        throw NNEvaluationException("can't find input: " + in_name);
-      }
-      const auto& invec = in.at(in_name);
-      if (invec.size() == 0) {
-        throw NNEvaluationException("Input vector of zero length");
-      }
-      if (invec.size() != n_cols) {
-        throw NNEvaluationException("Input vector size mismatch");
-      }
-      inmat.row(in_num) = Map<const VectorXd>(invec.data(), invec.size());
-      in_num++;
-    }
-    return m_scales.asDiagonal() * (inmat.colwise() + m_offsets);
-  }
-
-
-  // ______________________________________________________________________
-  // excpetions
-  LightweightNNException::LightweightNNException(std::string problem):
-    std::logic_error(problem)
-  {}
-  NNConfigurationException::NNConfigurationException(std::string problem):
-    LightweightNNException(problem)
-  {}
-  NNEvaluationException::NNEvaluationException(std::string problem):
-    LightweightNNException(problem)
-  {}
-
-}
diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx
deleted file mode 100644
index 38ca7c2f1d4d1eb90bfb8f4d63a4c63e101815b1..0000000000000000000000000000000000000000
--- a/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
-  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
-*/
-
-// WARNING: this code was copied automatically from
-// https://github.com/lwtnn/lwtnn.git (rev v2.0)
-// Please don't edit it! To get the latest version, run
-// > ./update-lwtnn.sh
-// from JetTagTools/share
-
-#include "JetTagTools/parse_json.h"
-
-#include <boost/property_tree/ptree.hpp>
-#include <boost/property_tree/json_parser.hpp>
-#include <cassert>
-#include <string>
-
-#include <iostream>
-
-namespace {
-  using namespace boost::property_tree;
-  using namespace lwt;
-  lwt::Activation get_activation(const std::string&);
-  lwt::Architecture get_architecture(const std::string&);
-  void set_defaults(LayerConfig& lc);
-  void add_dense_info(LayerConfig& lc, const ptree::value_type& pt);
-  void add_maxout_info(LayerConfig& lc, const ptree::value_type& pt);
-  void add_component_info(LayerConfig& lc, const ptree::value_type& pt);
-  void add_embedding_info(LayerConfig& lc, const ptree::value_type& pt);
-}
-
-
-namespace lwt {
-
-  JSONConfig parse_json(std::istream& json)
-  {
-    boost::property_tree::ptree pt;
-    boost::property_tree::read_json(json, pt);
-
-    JSONConfig cfg;
-    for (const auto& v: pt.get_child("inputs")) {
-      std::string name = v.second.get<std::string>("name");
-      auto offset = v.second.get<double>("offset");
-      auto scale = v.second.get<double>("scale");
-      Input input{name, offset, scale};
-      cfg.inputs.push_back(input);
-    }
-    for (const auto& v: pt.get_child("layers")) {
-      LayerConfig layer;
-      set_defaults(layer);
-      Architecture arch = get_architecture(
-        v.second.get<std::string>("architecture"));
-
-      if (arch == Architecture::DENSE) {
-        add_dense_info(layer, v);
-      } else if (arch == Architecture::NORMALIZATION) {
-        add_dense_info(layer, v); // re-use dense layer
-      } else if (arch == Architecture::MAXOUT) {
-        add_maxout_info(layer, v);
-      } else if (arch == Architecture::LSTM ||
-                 arch == Architecture::GRU ||
-                 arch == Architecture::HIGHWAY) {
-        add_component_info(layer, v);
-      } else if (arch == Architecture::EMBEDDING) {
-        add_embedding_info(layer, v);
-      } else {
-        throw std::logic_error("architecture not implemented");
-      }
-      layer.architecture = arch;
-
-      cfg.layers.push_back(layer);
-    }
-    for (const auto& v: pt.get_child("outputs"))
-    {
-      assert(v.first.empty()); // array elements have no names
-      cfg.outputs.push_back(v.second.data());
-    }
-    const std::string dname = "defaults";
-    if (pt.count(dname)) {
-      for (const auto& def: pt.get_child(dname)) {
-        cfg.defaults.emplace(def.first, def.second.get_value<double>());
-      }
-    }
-    const std::string mname = "miscellaneous";
-    if (pt.count(mname)) {
-      for (const auto& misc: pt.get_child(mname)) {
-        cfg.miscellaneous.emplace(
-          misc.first, misc.second.get_value<std::string>());
-      }
-    }
-    return cfg;
-  }
-
-}
-
-namespace {
-
-  lwt::Activation get_activation(const std::string& str) {
-    using namespace lwt;
-    if (str == "linear") return Activation::LINEAR;
-    if (str == "sigmoid") return Activation::SIGMOID;
-    if (str == "rectified") return Activation::RECTIFIED;
-    if (str == "softmax") return Activation::SOFTMAX;
-    if (str == "tanh") return Activation::TANH;
-    if (str == "hard_sigmoid") return Activation::HARD_SIGMOID;
-    throw std::logic_error("activation function " + str + " not recognized");
-    return Activation::LINEAR;
-  }
-
-
-  lwt::Architecture get_architecture(const std::string& str) {
-    using namespace lwt;
-    if (str == "dense") return Architecture::DENSE;
-    if (str == "normalization") return Architecture::NORMALIZATION;
-    if (str == "highway") return Architecture::HIGHWAY;
-    if (str == "maxout") return Architecture::MAXOUT;
-    if (str == "lstm") return Architecture::LSTM;
-    if (str == "gru") return Architecture::GRU;
-    if (str == "embedding") return Architecture::EMBEDDING;
-    throw std::logic_error("architecture " + str + " not recognized");
-  }
-
-  void set_defaults(LayerConfig& layer) {
-    layer.activation = Activation::NONE;
-    layer.inner_activation = Activation::NONE;
-    layer.architecture = Architecture::NONE;
-  }
-
-  void add_dense_info(LayerConfig& layer, const ptree::value_type& v) {
-    for (const auto& wt: v.second.get_child("weights")) {
-      layer.weights.push_back(wt.second.get_value<double>());
-    }
-    for (const auto& bs: v.second.get_child("bias")) {
-      layer.bias.push_back(bs.second.get_value<double>());
-    }
-    // this last category is currently only used for LSTM
-    if (v.second.count("U") != 0) {
-      for (const auto& wt: v.second.get_child("U") ) {
-        layer.U.push_back(wt.second.get_value<double>());
-      }
-    }
-
-    if (v.second.count("activation") != 0) {
-      layer.activation = get_activation(
-        v.second.get<std::string>("activation"));
-    }
-
-  }
-
-  void add_maxout_info(LayerConfig& layer, const ptree::value_type& v) {
-    using namespace lwt;
-    for (const auto& sub: v.second.get_child("sublayers")) {
-      LayerConfig sublayer;
-      set_defaults(sublayer);
-      add_dense_info(sublayer, sub);
-      layer.sublayers.push_back(sublayer);
-    }
-  }
-
-
-  const std::map<std::string, lwt::Component> component_map {
-    {"i", Component::I},
-    {"o", Component::O},
-    {"c", Component::C},
-    {"f", Component::F},
-    {"z", Component::Z},
-    {"r", Component::R},
-    {"h", Component::H},
-    {"t", Component::T},
-    {"carry", Component::CARRY}
-  };
-
-  void add_component_info(LayerConfig& layer, const ptree::value_type& v) {
-    using namespace lwt;
-    for (const auto& comp: v.second.get_child("components")) {
-      LayerConfig cfg;
-      set_defaults(cfg);
-      add_dense_info(cfg, comp);
-      layer.components[component_map.at(comp.first)] = cfg;
-    }
-    layer.activation = get_activation(
-      v.second.get<std::string>("activation"));
-    if (v.second.count("inner_activation") != 0) {
-      layer.inner_activation = get_activation(
-        v.second.get<std::string>("inner_activation"));
-    }
-  }
-
-
-  void add_embedding_info(LayerConfig& layer, const ptree::value_type& v) {
-    using namespace lwt;
-    for (const auto& sub: v.second.get_child("sublayers")) {
-      EmbeddingConfig emb;
-      for (const auto& wt: sub.second.get_child("weights")) {
-        emb.weights.push_back(wt.second.get_value<double>());
-      }
-      emb.index = sub.second.get<int>("index");
-      emb.n_out = sub.second.get<int>("n_out");
-      layer.embedding.push_back(emb);
-    }
-  }
-
-}