diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt b/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt index 7d3c90c2232dd5a9e253e4a4512fa30ff7cea734..6ebe0c92bb000adc70c1d31c459b614394763979 100644 --- a/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt +++ b/PhysicsAnalysis/JetTagging/JetTagTools/CMakeLists.txt @@ -55,6 +55,7 @@ atlas_depends_on_subdirs( find_package( Boost COMPONENTS filesystem thread system ) find_package( CLHEP ) find_package( Eigen ) +find_package( lwtnn ) find_package( ROOT COMPONENTS TMVA Core Tree MathCore Hist RIO pthread MathMore Minuit Minuit2 Matrix Physics HistPainter Rint RooFitCore RooFit ) @@ -86,11 +87,13 @@ atlas_add_library( src/parse_json.cxx src/Stack.cxx src/RNNIPTag.cxx PUBLIC_HEADERS JetTagTools - INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${CLHEP_INCLUDE_DIRS} ${EIGEN_INCLUDE_DIRS} + INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${CLHEP_INCLUDE_DIRS} + ${EIGEN_INCLUDE_DIRS} ${LWTNN_INCLUDE_DIRS} PRIVATE_INCLUDE_DIRS ${Boost_INCLUDE_DIRS} ${FASTJET_INCLUDE_DIRS} DEFINITIONS ${CLHEP_DEFINITIONS} LINK_LIBRARIES ${ROOT_LIBRARIES} ${CLHEP_LIBRARIES} ${EIGEN_LIBRARIES} + ${LWTNN_LIBRARIES} AsgTools AthenaBaseComps SGTools GeoPrimitives xAODBTagging xAODJet xAODTracking GaudiKernel JetTagInfo JetSubStructureUtils TrkParameters JetRecLib JetSubStructureMomentToolsLib egammaMVACalibLib MVAUtils diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h index 20dccc1eae9851fb8e6e66a466b289c774bfd564..68fc3efe827b9d678751c0333c2d3ace48522daf 100644 --- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h +++ b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/DL1Tag.h @@ -15,7 +15,7 @@ @authors Dan Guest, Luke de Oliveira, Marie Lanfermann ********************************************************/ #include "AthenaBaseComps/AthAlgTool.h" -#include "JetTagTools/NNLayerConfig.h" +#include "lwtnn/lightweight_network_config.hh" #include "JetTagTools/IMultivariateJetTagger.h" #include <vector> diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h deleted file mode 100644 index 384dc35729ce75692772e63bdd433ab66117943b..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/LightweightNeuralNetwork.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#ifndef LIGHTWEIGHT_NEURAL_NETWORK_HH -#define LIGHTWEIGHT_NEURAL_NETWORK_HH - -// Lightweight Neural Networks -// -// This is a simple NN implementation, designed to be lightweight in -// terms of both size and dependencies. For sanity we use Eigen, but -// otherwise this aims to be a minimal NN class which is fully -// configurable at runtime. -// -// The classes defined here are the high level wrappers: they don't -// directly include any Eigen code (to speed compliation of algorithms -// that use them), and they store data in STL objects. -// -// Authors: Dan Guest <dguest@cern.ch> -// Michael Kagan <mkagan@cern.ch> -// Michela Paganini <micky.91@hotmail.com> - -#include "NNLayerConfig.h" - -namespace lwt { - - class Stack; - class RecurrentStack; - class InputPreprocessor; - class InputVectorPreprocessor; - - // use a normal map externally, since these are more common in user - // code. - // TODO: is it worth changing to unordered_map? - typedef std::map<std::string, double> ValueMap; - typedef std::vector<std::pair<std::string, double> > ValueVector; - typedef std::map<std::string, std::vector<double> > VectorMap; - - // ______________________________________________________________________ - // high-level wrappers - - // feed-forward variant - class LightweightNeuralNetwork - { - public: - LightweightNeuralNetwork(const std::vector<Input>& inputs, - const std::vector<LayerConfig>& layers, - const std::vector<std::string>& outputs); - ~LightweightNeuralNetwork(); - // disable copying until we need it... - LightweightNeuralNetwork(LightweightNeuralNetwork&) = delete; - LightweightNeuralNetwork& operator=(LightweightNeuralNetwork&) = delete; - - // use a normal map externally, since these are more common in - // user code. - // TODO: is it worth changing to unordered_map? - ValueMap compute(const ValueMap&) const; - - private: - // use the Stack class above as the computational core - Stack* m_stack; - InputPreprocessor* m_preproc; - - // output labels - std::vector<std::string> m_outputs; - - }; - - // recurrent version - class LightweightRNN - { - public: - LightweightRNN(const std::vector<Input>& inputs, - const std::vector<LayerConfig>& layers, - const std::vector<std::string>& outputs); - ~LightweightRNN(); - LightweightRNN(LightweightRNN&) = delete; - LightweightRNN& operator=(LightweightRNN&) = delete; - - ValueMap reduce(const std::vector<ValueMap>&) const; - ValueMap reduce(const VectorMap&) const; - private: - RecurrentStack* m_stack; - InputPreprocessor* m_preproc; - InputVectorPreprocessor* m_vec_preproc; - std::vector<std::string> m_outputs; - size_t m_n_inputs; - }; - -} -#endif diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h deleted file mode 100644 index 9cc599e7d81990ecbb1daaba6ffc3111b014ed31..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/NNLayerConfig.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#ifndef NN_LAYER_CONFIG_HH -#define NN_LAYER_CONFIG_HH - -// Layer Configiruation for Lightweight Tagger -// -// The structures below are used to initalize -// `LightweightNeuralNetwork` and the simpler `Stack`. -// -// Author: Dan Guest <dguest@cern.ch> - -#include <vector> -#include <string> -#include <map> - -namespace lwt { - enum class Activation {NONE, LINEAR, SIGMOID, RECTIFIED, SOFTMAX, TANH, - HARD_SIGMOID}; - enum class Architecture {NONE, DENSE, NORMALIZATION, MAXOUT, HIGHWAY, - LSTM, GRU, EMBEDDING}; - // components (for LSTM, etc) - enum class Component { - I, O, C, F, // LSTM - Z, R, H, // GRU - T, CARRY}; // Highway - - // structure for embedding layers - struct EmbeddingConfig - { - std::vector<double> weights; - int index; - int n_out; - }; - - // main layer configuration - struct LayerConfig - { - // dense layer info - std::vector<double> weights; - std::vector<double> bias; - std::vector<double> U; // TODO: what is this thing called in LSTMs? - Activation activation; - Activation inner_activation; // for LSTMs and GRUs - - // additional info for sublayers - std::vector<LayerConfig> sublayers; - std::map<Component, LayerConfig> components; - std::vector<EmbeddingConfig> embedding; - - // arch flag - Architecture architecture; - }; - - struct Input - { - std::string name; - double offset; - double scale; - }; -} - -#endif diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h deleted file mode 100644 index 0b1b56889e561edc43f18874d7a373367a056b02..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/Stack.h +++ /dev/null @@ -1,374 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#ifndef STACK_HH -#define STACK_HH - -// Stack classes -// -// These are the low-level classes that implement feed-forward and -// recurrent neural networks. All the Eigen-dependant code in this -// library should live in this file. -// -// To keep the Eigen code out of the high-level interface, the STL -> -// Eigen ``preprocessor'' classes are also defined here. -// -// The ordering of classes is as follows: -// - Feed-forward Stack class -// - Feed-forward Layer classes -// - RecurrentStack class -// - Recurrent layers -// - Activation functions -// - Various utility functions -// - Preprocessor classes - - -#include "Exceptions.h" -#include "NNLayerConfig.h" - -#include <Eigen/Dense> - -#include <vector> -#include <map> -#include <functional> - -namespace lwt { - - using Eigen::VectorXd; - using Eigen::MatrixXd; - - class ILayer; - class IRecurrentLayer; - - // use a normal map externally, since these are more common in user - // code. TODO: is it worth changing to unordered_map? - typedef std::map<std::string, double> ValueMap; - typedef std::vector<std::pair<std::string, double> > ValueVector; - typedef std::map<std::string, std::vector<double> > VectorMap; - - - // ______________________________________________________________________ - // Feed forward Stack class - - class Stack - { - public: - // constructor for dummy net - Stack(); - // constructor for real net - Stack(size_t n_inputs, const std::vector<LayerConfig>& layers, - size_t skip_layers = 0); - ~Stack(); - - // make non-copyable for now - Stack(Stack&) = delete; - Stack& operator=(Stack&) = delete; - - VectorXd compute(VectorXd) const; - size_t n_outputs() const; - - private: - // returns the size of the next layer - size_t add_layers(size_t n_inputs, const LayerConfig&); - size_t add_dense_layers(size_t n_inputs, const LayerConfig&); - size_t add_normalization_layers(size_t n_inputs, const LayerConfig&); - size_t add_highway_layers(size_t n_inputs, const LayerConfig&); - size_t add_maxout_layers(size_t n_inputs, const LayerConfig&); - std::vector<ILayer*> m_layers; - size_t m_n_outputs; - }; - - // _______________________________________________________________________ - // Feed-forward layers - - class ILayer - { - public: - virtual ~ILayer() {} - virtual VectorXd compute(const VectorXd&) const = 0; - }; - - class DummyLayer: public ILayer - { - public: - virtual VectorXd compute(const VectorXd&) const; - }; - - class UnaryActivationLayer: public ILayer - { - public: - UnaryActivationLayer(Activation); - virtual VectorXd compute(const VectorXd&) const; - private: - std::function<double(double)> m_func; - }; - - class SoftmaxLayer: public ILayer - { - public: - virtual VectorXd compute(const VectorXd&) const; - }; - - class BiasLayer: public ILayer - { - public: - BiasLayer(const VectorXd& bias); - BiasLayer(const std::vector<double>& bias); - virtual VectorXd compute(const VectorXd&) const; - private: - VectorXd m_bias; - }; - - class MatrixLayer: public ILayer - { - public: - MatrixLayer(const MatrixXd& matrix); - virtual VectorXd compute(const VectorXd&) const; - private: - MatrixXd m_matrix; - }; - - class MaxoutLayer: public ILayer - { - public: - typedef std::pair<MatrixXd, VectorXd> InitUnit; - MaxoutLayer(const std::vector<InitUnit>& maxout_tensor); - virtual VectorXd compute(const VectorXd&) const; - private: - std::vector<MatrixXd> m_matrices; - MatrixXd m_bias; - }; - - - /// Normalization layer /// - /// https://arxiv.org/abs/1502.03167 /// - class NormalizationLayer : public ILayer - { - - public: - NormalizationLayer(const VectorXd& W,const VectorXd& b); - virtual VectorXd compute(const VectorXd&) const; - - private: - VectorXd _W; - VectorXd _b; - - }; - - //http://arxiv.org/pdf/1505.00387v2.pdf - class HighwayLayer: public ILayer - { - public: - HighwayLayer(const MatrixXd& W, - const VectorXd& b, - const MatrixXd& W_carry, - const VectorXd& b_carry, - Activation activation); - virtual VectorXd compute(const VectorXd&) const; - private: - MatrixXd m_w_t; - VectorXd m_b_t; - MatrixXd m_w_c; - VectorXd m_b_c; - std::function<double(double)> m_act; - }; - - // ______________________________________________________________________ - // Recurrent Stack - - class RecurrentStack - { - public: - RecurrentStack(size_t n_inputs, const std::vector<LayerConfig>& layers); - ~RecurrentStack(); - RecurrentStack(RecurrentStack&) = delete; - RecurrentStack& operator=(RecurrentStack&) = delete; - VectorXd reduce(MatrixXd inputs) const; - size_t n_outputs() const; - private: - std::vector<IRecurrentLayer*> m_layers; - size_t add_lstm_layers(size_t n_inputs, const LayerConfig&); - size_t add_gru_layers(size_t n_inputs, const LayerConfig&); - size_t add_embedding_layers(size_t n_inputs, const LayerConfig&); - Stack* m_stack; - }; - - - // __________________________________________________________________ - // Recurrent layers - - class IRecurrentLayer - { - public: - virtual ~IRecurrentLayer() {} - virtual MatrixXd scan( const MatrixXd&) = 0; - }; - - class EmbeddingLayer : public IRecurrentLayer - { - public: - EmbeddingLayer(int var_row_index, MatrixXd W); - virtual ~EmbeddingLayer() {}; - virtual MatrixXd scan( const MatrixXd&); - - private: - int m_var_row_index; - MatrixXd m_W; - }; - - /// long short term memory /// - class LSTMLayer : public IRecurrentLayer - { - public: - LSTMLayer(Activation activation, Activation inner_activation, - MatrixXd W_i, MatrixXd U_i, VectorXd b_i, - MatrixXd W_f, MatrixXd U_f, VectorXd b_f, - MatrixXd W_o, MatrixXd U_o, VectorXd b_o, - MatrixXd W_c, MatrixXd U_c, VectorXd b_c, - bool return_sequences = true); - - virtual ~LSTMLayer() {}; - virtual VectorXd step( const VectorXd&); - virtual MatrixXd scan( const MatrixXd&); - - private: - std::function<double(double)> m_activation_fun; - std::function<double(double)> m_inner_activation_fun; - - MatrixXd m_W_i; - MatrixXd m_U_i; - VectorXd m_b_i; - - MatrixXd m_W_f; - MatrixXd m_U_f; - VectorXd m_b_f; - - MatrixXd m_W_o; - MatrixXd m_U_o; - VectorXd m_b_o; - - MatrixXd m_W_c; - MatrixXd m_U_c; - VectorXd m_b_c; - - //states - MatrixXd m_C_t; - MatrixXd m_h_t; - int m_time; - - int m_n_outputs; - - bool m_return_sequences; - }; - - /// gated recurrent unit /// - class GRULayer : public IRecurrentLayer - { - public: - GRULayer(Activation activation, Activation inner_activation, - MatrixXd W_z, MatrixXd U_z, VectorXd b_z, - MatrixXd W_r, MatrixXd U_r, VectorXd b_r, - MatrixXd W_h, MatrixXd U_h, VectorXd b_h, - bool return_sequences = true); - - virtual ~GRULayer() {}; - virtual VectorXd step( const VectorXd&); - virtual MatrixXd scan( const MatrixXd&); - - private: - std::function<double(double)> m_activation_fun; - std::function<double(double)> m_inner_activation_fun; - - MatrixXd m_W_z; - MatrixXd m_U_z; - VectorXd m_b_z; - - MatrixXd m_W_r; - MatrixXd m_U_r; - VectorXd m_b_r; - - MatrixXd m_W_h; - MatrixXd m_U_h; - VectorXd m_b_h; - - //states - MatrixXd m_h_t; - int m_time; - - int m_n_outputs; - - bool m_return_sequences; - }; - - // ______________________________________________________________________ - // Activation functions - - // note that others are supported but are too simple to - // require a special function - double nn_sigmoid( double x ); - double nn_hard_sigmoid( double x ); - double nn_tanh( double x ); - double nn_relu( double x ); - std::function<double(double)> get_activation(lwt::Activation); - - // WARNING: you own this pointer! Only call when assigning to member data! - ILayer* get_raw_activation_layer(Activation); - - // ______________________________________________________________________ - // utility functions - - // functions to build up basic units from vectors - MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs); - VectorXd build_vector(const std::vector<double>& bias); - - // consistency checks - void throw_if_not_maxout(const LayerConfig& layer); - void throw_if_not_dense(const LayerConfig& layer); - void throw_if_not_normalization(const LayerConfig& layer); - - // LSTM component for convenience in some layers - struct DenseComponents - { - Eigen::MatrixXd W; - Eigen::MatrixXd U; - Eigen::VectorXd b; - }; - DenseComponents get_component(const lwt::LayerConfig& layer, size_t n_in); - - // ______________________________________________________________________ - // input preprocessor (handles normalization and packing into Eigen) - - class InputPreprocessor - { - public: - InputPreprocessor(const std::vector<Input>& inputs); - VectorXd operator()(const ValueMap&) const; - private: - // input transformations - VectorXd m_offsets; - VectorXd m_scales; - std::vector<std::string> m_names; - }; - - class InputVectorPreprocessor - { - public: - InputVectorPreprocessor(const std::vector<Input>& inputs); - MatrixXd operator()(const VectorMap&) const; - private: - // input transformations - VectorXd m_offsets; - VectorXd m_scales; - std::vector<std::string> m_names; - }; - -} - -#endif // STACK_HH diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h b/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h deleted file mode 100644 index 6a6fde505ebccd286d3a8f215b2c0faca28ce661..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/JetTagTools/parse_json.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#ifndef PARSE_JSON_HH -#define PARSE_JSON_HH - -#include "NNLayerConfig.h" - -#include <istream> -#include <map> - -namespace lwt { - struct JSONConfig - { - std::vector<LayerConfig> layers; - std::vector<Input> inputs; - std::vector<std::string> outputs; - std::map<std::string, double> defaults; - std::map<std::string, std::string> miscellaneous; - }; - JSONConfig parse_json(std::istream& json); -} - - -#endif diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx index b6e88c2092fa3b720db1a7a186a085a63f374f03..4a0b5ddda491ae28e74fc3a9b37bb6a426ecfb43 100644 --- a/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx +++ b/PhysicsAnalysis/JetTagging/JetTagTools/src/DL1Tag.cxx @@ -22,9 +22,10 @@ #include "JetTagCalibration/CalibrationBroker.h" #include "JetTagTools/DL1Tag.h" -#include "JetTagTools/LightweightNeuralNetwork.h" -#include "JetTagTools/parse_json.h" -#include "JetTagTools/Exceptions.h" + +#include "lwtnn/LightweightNeuralNetwork.hh" +#include "lwtnn/parse_json.hh" +#include "lwtnn/Exceptions.hh" #include "xAODBTagging/BTagging.h" #include "xAODJet/Jet.h" diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx deleted file mode 100644 index 9fe83d0e318e366b33a8ea73f47de147944964cc..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/src/LightweightNeuralNetwork.cxx +++ /dev/null @@ -1,119 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#include "JetTagTools/LightweightNeuralNetwork.h" -#include "JetTagTools/Stack.h" -#include <Eigen/Dense> - -#include <set> - -// internal utility functions -namespace { - using namespace Eigen; - using namespace lwt; -} -namespace lwt { - - // ______________________________________________________________________ - // LightweightNeuralNetwork HL wrapper - LightweightNeuralNetwork::LightweightNeuralNetwork( - const std::vector<Input>& inputs, - const std::vector<LayerConfig>& layers, - const std::vector<std::string>& outputs): - m_stack(new Stack(inputs.size(), layers)), - m_preproc(new InputPreprocessor(inputs)), - m_outputs(outputs.begin(), outputs.end()) - { - if (m_outputs.size() != m_stack->n_outputs()) { - std::string problem = "internal stack has " + - std::to_string(m_stack->n_outputs()) + " outputs, but " + - std::to_string(m_outputs.size()) + " were given"; - throw NNConfigurationException(problem); - } - } - - LightweightNeuralNetwork::~LightweightNeuralNetwork() { - delete m_stack; - m_stack = 0; - delete m_preproc; - m_preproc = 0; - } - - lwt::ValueMap - LightweightNeuralNetwork::compute(const lwt::ValueMap& in) const { - - // compute outputs - const auto& preproc = *m_preproc; - auto outvec = m_stack->compute(preproc(in)); - assert(outvec.rows() > 0); - auto out_size = static_cast<size_t>(outvec.rows()); - assert(out_size == m_outputs.size()); - - // build and return output map - lwt::ValueMap out_map; - for (size_t out_n = 0; out_n < out_size; out_n++) { - out_map.emplace(m_outputs.at(out_n), outvec(out_n)); - } - return out_map; - } - - // ______________________________________________________________________ - // LightweightRNN - - LightweightRNN::LightweightRNN(const std::vector<Input>& inputs, - const std::vector<LayerConfig>& layers, - const std::vector<std::string>& outputs): - m_stack(new RecurrentStack(inputs.size(), layers)), - m_preproc(new InputPreprocessor(inputs)), - m_vec_preproc(new InputVectorPreprocessor(inputs)), - m_outputs(outputs.begin(), outputs.end()), - m_n_inputs(inputs.size()) - { - if (m_outputs.size() != m_stack->n_outputs()) { - throw NNConfigurationException( - "Mismatch between NN output dimensions and output labels"); - } - } - LightweightRNN::~LightweightRNN() { - delete m_stack; - delete m_preproc; - delete m_vec_preproc; - } - - ValueMap LightweightRNN::reduce(const std::vector<ValueMap>& in) const { - const auto& preproc = *m_preproc; - MatrixXd inputs(m_n_inputs, in.size()); - for (size_t iii = 0; iii < in.size(); iii++) { - inputs.col(iii) = preproc(in.at(iii)); - } - auto outvec = m_stack->reduce(inputs); - ValueMap out; - const auto n_rows = static_cast<size_t>(outvec.rows()); - for (size_t iii = 0; iii < n_rows; iii++) { - out.emplace(m_outputs.at(iii), outvec(iii)); - } - return out; - } - - // this version should be slightly faster since it only has to sort - // the inputs once - ValueMap LightweightRNN::reduce(const VectorMap& in) const { - const auto& preproc = *m_vec_preproc; - auto outvec = m_stack->reduce(preproc(in)); - ValueMap out; - const auto n_rows = static_cast<size_t>(outvec.rows()); - for (size_t iii = 0; iii < n_rows; iii++) { - out.emplace(m_outputs.at(iii), outvec(iii)); - } - return out; - } - - -} diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx index 200cc9e47ac046696c2f5991769335528bf9a362..5fa0862ff8e14cb4dfb0cb806303471339e750bb 100644 --- a/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx +++ b/PhysicsAnalysis/JetTagging/JetTagTools/src/RNNIPTag.cxx @@ -3,9 +3,10 @@ */ #include "JetTagTools/RNNIPTag.h" -#include "JetTagTools/LightweightNeuralNetwork.h" -#include "JetTagTools/parse_json.h" -#include "JetTagTools/Exceptions.h" + +#include "lwtnn/LightweightNeuralNetwork.hh" +#include "lwtnn/Exceptions.hh" +#include "lwtnn/parse_json.hh" #include "JetTagTools/TrackSelector.h" #include "JetTagTools/GradedTrack.h" diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx deleted file mode 100644 index 1875492b9d99dc84bf1cdd64ed80972399fc8461..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/src/Stack.cxx +++ /dev/null @@ -1,756 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#include "JetTagTools/Stack.h" -#include <Eigen/Dense> - -#include <set> - -// internal utility functions -namespace { - using namespace Eigen; - using namespace lwt; -} -namespace lwt { - - // ______________________________________________________________________ - // Feed forward Stack class - - // dummy construction routine - Stack::Stack() { - m_layers.push_back(new DummyLayer); - m_layers.push_back(new UnaryActivationLayer(Activation::SIGMOID)); - m_layers.push_back(new BiasLayer(std::vector<double>{1, 1, 1, 1})); - MatrixXd mat(4, 4); - mat << - 0, 0, 0, 1, - 0, 0, 1, 0, - 0, 1, 0, 0, - 1, 0, 0, 0; - m_layers.push_back(new MatrixLayer(mat)); - m_n_outputs = 4; - } - - // construct from LayerConfig - Stack::Stack(size_t n_inputs, const std::vector<LayerConfig>& layers, - size_t skip) { - for (size_t nnn = skip; nnn < layers.size(); nnn++) { - n_inputs = add_layers(n_inputs, layers.at(nnn)); - } - // the final assigned n_inputs is the number of output nodes - m_n_outputs = n_inputs; - } - - Stack::~Stack() { - for (auto& layer: m_layers) { - delete layer; - layer = 0; - } - } - VectorXd Stack::compute(VectorXd in) const { - for (const auto& layer: m_layers) { - in = layer->compute(in); - } - return in; - } - size_t Stack::n_outputs() const { - return m_n_outputs; - } - - - // Private Stack methods to add various types of layers - // - // top level add_layers method. This delegates to the other methods - // below - size_t Stack::add_layers(size_t n_inputs, const LayerConfig& layer) { - if (layer.architecture == Architecture::DENSE) { - return add_dense_layers(n_inputs, layer); - } else if (layer.architecture == Architecture::NORMALIZATION){ - return add_normalization_layers(n_inputs, layer); - } else if (layer.architecture == Architecture::HIGHWAY){ - return add_highway_layers(n_inputs, layer); - } else if (layer.architecture == Architecture::MAXOUT) { - return add_maxout_layers(n_inputs, layer); - } - throw NNConfigurationException("unknown architecture"); - } - - size_t Stack::add_dense_layers(size_t n_inputs, const LayerConfig& layer) { - assert(layer.architecture == Architecture::DENSE); - throw_if_not_dense(layer); - - size_t n_outputs = n_inputs; - - // add matrix layer - if (layer.weights.size() > 0) { - MatrixXd matrix = build_matrix(layer.weights, n_inputs); - n_outputs = matrix.rows(); - m_layers.push_back(new MatrixLayer(matrix)); - }; - - // add bias layer - if (layer.bias.size() > 0) { - if (n_outputs != layer.bias.size() ) { - std::string problem = "tried to add a bias layer with " + - std::to_string(layer.bias.size()) + " entries, previous layer" - " had " + std::to_string(n_outputs) + " outputs"; - throw NNConfigurationException(problem); - } - m_layers.push_back(new BiasLayer(layer.bias)); - } - - // add activation layer - if (layer.activation != Activation::LINEAR) { - m_layers.push_back(get_raw_activation_layer(layer.activation)); - } - - return n_outputs; - } - - size_t Stack::add_normalization_layers(size_t n_inputs, const LayerConfig& layer) { - assert(layer.architecture == Architecture::NORMALIZATION); - throw_if_not_normalization(layer); - - // Do some checks - if ( layer.weights.size() < 1 || layer.bias.size() < 1 ) { - std::string problem = "Either weights or bias layer size is < 1"; - throw NNConfigurationException(problem); - }; - if ( layer.weights.size() != layer.bias.size() ) { - std::string problem = "weights and bias layer are not equal in size!"; - throw NNConfigurationException(problem); - }; - VectorXd v_weights = build_vector(layer.weights); - VectorXd v_bias = build_vector(layer.bias); - - m_layers.push_back( - new NormalizationLayer(v_weights, v_bias)); - return n_inputs; - } - - - size_t Stack::add_highway_layers(size_t n_inputs, const LayerConfig& layer) { - auto& comps = layer.components; - const auto& t = get_component(comps.at(Component::T), n_inputs); - const auto& c = get_component(comps.at(Component::CARRY), n_inputs); - - m_layers.push_back( - new HighwayLayer(t.W, t.b, c.W, c.b, layer.activation)); - return n_inputs; - } - - - size_t Stack::add_maxout_layers(size_t n_inputs, const LayerConfig& layer) { - assert(layer.architecture == Architecture::MAXOUT); - throw_if_not_maxout(layer); - std::vector<MaxoutLayer::InitUnit> matrices; - std::set<size_t> n_outputs; - for (const auto& sublayer: layer.sublayers) { - MatrixXd matrix = build_matrix(sublayer.weights, n_inputs); - VectorXd bias = build_vector(sublayer.bias); - n_outputs.insert(matrix.rows()); - matrices.push_back(std::make_pair(matrix, bias)); - } - if (n_outputs.size() == 0) { - throw NNConfigurationException("tried to build maxout withoutweights!"); - } - else if (n_outputs.size() != 1) { - throw NNConfigurationException("uneven matrices for maxout"); - } - m_layers.push_back(new MaxoutLayer(matrices)); - return *n_outputs.begin(); - } - - - // _______________________________________________________________________ - // Feed-forward layers - - VectorXd DummyLayer::compute(const VectorXd& in) const { - return in; - } - - // activation functions - UnaryActivationLayer::UnaryActivationLayer(Activation act): - m_func(get_activation(act)) - { - } - VectorXd UnaryActivationLayer::compute(const VectorXd& in) const { - return in.unaryExpr(m_func); - } - - VectorXd SoftmaxLayer::compute(const VectorXd& in) const { - size_t n_elements = in.rows(); - VectorXd exp(n_elements); - for (size_t iii = 0; iii < n_elements; iii++) { - exp(iii) = std::exp(in(iii)); - } - double sum_exp = exp.sum(); - return exp / sum_exp; - } - - // bias layer - BiasLayer::BiasLayer(const VectorXd& bias): m_bias(bias) - { - } - BiasLayer::BiasLayer(const std::vector<double>& bias): - m_bias(build_vector(bias)) - { - } - VectorXd BiasLayer::compute(const VectorXd& in) const { - return in + m_bias; - } - - // basic dense matrix layer - MatrixLayer::MatrixLayer(const MatrixXd& matrix): - m_matrix(matrix) - { - } - VectorXd MatrixLayer::compute(const VectorXd& in) const { - return m_matrix * in; - } - - // maxout layer - MaxoutLayer::MaxoutLayer(const std::vector<MaxoutLayer::InitUnit>& units): - m_bias(units.size(), units.front().first.rows()) - { - int out_pos = 0; - for (const auto& unit: units) { - m_matrices.push_back(unit.first); - m_bias.row(out_pos) = unit.second; - out_pos++; - } - } - VectorXd MaxoutLayer::compute(const VectorXd& in) const { - // eigen supports tensors, but only in the experimental component - // for now just stick to matrix and vector classes - const size_t n_mat = m_matrices.size(); - const size_t out_dim = m_matrices.front().rows(); - MatrixXd outputs(n_mat, out_dim); - for (size_t mat_n = 0; mat_n < n_mat; mat_n++) { - outputs.row(mat_n) = m_matrices.at(mat_n) * in; - } - outputs += m_bias; - return outputs.colwise().maxCoeff(); - } - - // Normalization layer - NormalizationLayer::NormalizationLayer(const VectorXd& W, - const VectorXd& b): - _W(W), _b(b) - { - } - VectorXd NormalizationLayer::compute(const VectorXd& in) const { - VectorXd shift = in + _b ; - return _W.cwiseProduct(shift); - } - - // highway layer - HighwayLayer::HighwayLayer(const MatrixXd& W, - const VectorXd& b, - const MatrixXd& W_carry, - const VectorXd& b_carry, - Activation activation): - m_w_t(W), m_b_t(b), m_w_c(W_carry), m_b_c(b_carry), - m_act(get_activation(activation)) - { - } - VectorXd HighwayLayer::compute(const VectorXd& in) const { - const std::function<double(double)> sig(nn_sigmoid); - ArrayXd c = (m_w_c * in + m_b_c).unaryExpr(sig); - ArrayXd t = (m_w_t * in + m_b_t).unaryExpr(m_act); - return c * t + (1 - c) * in.array(); - } - - // ______________________________________________________________________ - // Recurrent Stack - - RecurrentStack::RecurrentStack(size_t n_inputs, - const std::vector<lwt::LayerConfig>& layers) - { - using namespace lwt; - size_t layer_n = 0; - const size_t n_layers = layers.size(); - for (;layer_n < n_layers; layer_n++) { - auto& layer = layers.at(layer_n); - - // add recurrent layers (now LSTM and GRU!) - if (layer.architecture == Architecture::LSTM) { - n_inputs = add_lstm_layers(n_inputs, layer); - } else if (layer.architecture == Architecture::GRU) { - n_inputs = add_gru_layers(n_inputs, layer); - } else if (layer.architecture == Architecture::EMBEDDING) { - n_inputs = add_embedding_layers(n_inputs, layer); - } else { - // leave this loop if we're done with the recurrent stuff - break; - } - } - // fill the remaining dense layers - m_stack = new Stack(n_inputs, layers, layer_n); - } - RecurrentStack::~RecurrentStack() { - for (auto& layer: m_layers) { - delete layer; - layer = 0; - } - delete m_stack; - m_stack = 0; - } - VectorXd RecurrentStack::reduce(MatrixXd in) const { - for (auto* layer: m_layers) { - in = layer->scan(in); - } - return m_stack->compute(in.col(in.cols() - 1)); - } - size_t RecurrentStack::n_outputs() const { - return m_stack->n_outputs(); - } - - size_t RecurrentStack::add_lstm_layers(size_t n_inputs, - const LayerConfig& layer) { - auto& comps = layer.components; - const auto& i = get_component(comps.at(Component::I), n_inputs); - const auto& o = get_component(comps.at(Component::O), n_inputs); - const auto& f = get_component(comps.at(Component::F), n_inputs); - const auto& c = get_component(comps.at(Component::C), n_inputs); - m_layers.push_back( - new LSTMLayer(layer.activation, layer.inner_activation, - i.W, i.U, i.b, - f.W, f.U, f.b, - o.W, o.U, o.b, - c.W, c.U, c.b)); - return o.b.rows(); - } - - size_t RecurrentStack::add_gru_layers(size_t n_inputs, - const LayerConfig& layer) { - auto& comps = layer.components; - const auto& z = get_component(comps.at(Component::Z), n_inputs); - const auto& r = get_component(comps.at(Component::R), n_inputs); - const auto& h = get_component(comps.at(Component::H), n_inputs); - m_layers.push_back( - new GRULayer(layer.activation, layer.inner_activation, - z.W, z.U, z.b, - r.W, r.U, r.b, - h.W, h.U, h.b)); - return h.b.rows(); - } - - size_t RecurrentStack::add_embedding_layers(size_t n_inputs, - const LayerConfig& layer) { - for (const auto& emb: layer.embedding) { - size_t n_wt = emb.weights.size(); - size_t n_cats = n_wt / emb.n_out; - MatrixXd mat = build_matrix(emb.weights, n_cats); - m_layers.push_back(new EmbeddingLayer(emb.index, mat)); - n_inputs += emb.n_out - 1; - } - return n_inputs; - } - - // __________________________________________________________________ - // Recurrent layers - - EmbeddingLayer::EmbeddingLayer(int var_row_index, MatrixXd W): - m_var_row_index(var_row_index), - m_W(W) - { - if(var_row_index < 0) - throw NNConfigurationException( - "EmbeddingLayer::EmbeddingLayer - can not set var_row_index<0," - " it is an index for a matrix row!"); - } - - MatrixXd EmbeddingLayer::scan( const MatrixXd& x) { - - if( m_var_row_index >= x.rows() ) - throw NNEvaluationException( - "EmbeddingLayer::scan - var_row_index is larger than input matrix" - " number of rows!"); - - MatrixXd embedded(m_W.rows(), x.cols()); - - for(int icol=0; icol<x.cols(); icol++) { - double vector_idx = x(m_var_row_index, icol); - bool is_int = std::floor(vector_idx) == vector_idx; - bool is_valid = (vector_idx >= 0) && (vector_idx < m_W.cols()); - if (!is_int || !is_valid) throw NNEvaluationException( - "Invalid embedded index: " + std::to_string(vector_idx)); - embedded.col(icol) = m_W.col( vector_idx ); - } - - //only embed 1 variable at a time, so this should be correct size - MatrixXd out(m_W.rows() + (x.rows() - 1), x.cols()); - - //assuming m_var_row_index is an index with first possible value of 0 - if(m_var_row_index > 0) - out.topRows(m_var_row_index) = x.topRows(m_var_row_index); - - out.block(m_var_row_index, 0, embedded.rows(), embedded.cols()) = embedded; - - if( m_var_row_index < (x.rows()-1) ) - out.bottomRows( x.cols() - 1 - m_var_row_index) - = x.bottomRows( x.cols() - 1 - m_var_row_index); - - return out; - } - - - // LSTM layer - LSTMLayer::LSTMLayer(Activation activation, Activation inner_activation, - MatrixXd W_i, MatrixXd U_i, VectorXd b_i, - MatrixXd W_f, MatrixXd U_f, VectorXd b_f, - MatrixXd W_o, MatrixXd U_o, VectorXd b_o, - MatrixXd W_c, MatrixXd U_c, VectorXd b_c, - bool return_sequences): - m_W_i(W_i), - m_U_i(U_i), - m_b_i(b_i), - m_W_f(W_f), - m_U_f(U_f), - m_b_f(b_f), - m_W_o(W_o), - m_U_o(U_o), - m_b_o(b_o), - m_W_c(W_c), - m_U_c(U_c), - m_b_c(b_c), - m_time(-1), - m_return_sequences(return_sequences) - { - m_n_outputs = m_W_o.rows(); - - m_activation_fun = get_activation(activation); - m_inner_activation_fun = get_activation(inner_activation); - } - - VectorXd LSTMLayer::step( const VectorXd& x_t ) { - // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L740 - - if(m_time < 0) - throw NNEvaluationException( - "LSTMLayer::compute - time is less than zero!"); - - const auto& act_fun = m_activation_fun; - const auto& in_act_fun = m_inner_activation_fun; - - int tm1 = std::max(0, m_time - 1); - VectorXd h_tm1 = m_h_t.col(tm1); - VectorXd C_tm1 = m_C_t.col(tm1); - - VectorXd i = (m_W_i*x_t + m_b_i + m_U_i*h_tm1).unaryExpr(in_act_fun); - VectorXd f = (m_W_f*x_t + m_b_f + m_U_f*h_tm1).unaryExpr(in_act_fun); - VectorXd o = (m_W_o*x_t + m_b_o + m_U_o*h_tm1).unaryExpr(in_act_fun); - VectorXd ct = (m_W_c*x_t + m_b_c + m_U_c*h_tm1).unaryExpr(act_fun); - - m_C_t.col(m_time) = f.cwiseProduct(C_tm1) + i.cwiseProduct(ct); - m_h_t.col(m_time) = o.cwiseProduct( m_C_t.col(m_time).unaryExpr(act_fun) ); - - return VectorXd( m_h_t.col(m_time) ); - } - - MatrixXd LSTMLayer::scan( const MatrixXd& x ){ - - m_C_t.resize(m_n_outputs, x.cols()); - m_C_t.setZero(); - m_h_t.resize(m_n_outputs, x.cols()); - m_h_t.setZero(); - m_time = -1; - - - for(m_time=0; m_time < x.cols(); m_time++) { - this->step( x.col( m_time ) ); - } - - return m_return_sequences ? m_h_t : m_h_t.col(m_h_t.cols() - 1); - } - - - // GRU layer - GRULayer::GRULayer(Activation activation, Activation inner_activation, - MatrixXd W_z, MatrixXd U_z, VectorXd b_z, - MatrixXd W_r, MatrixXd U_r, VectorXd b_r, - MatrixXd W_h, MatrixXd U_h, VectorXd b_h, - bool return_sequences): - m_W_z(W_z), - m_U_z(U_z), - m_b_z(b_z), - m_W_r(W_r), - m_U_r(U_r), - m_b_r(b_r), - m_W_h(W_h), - m_U_h(U_h), - m_b_h(b_h), - m_time(-1), - m_return_sequences(return_sequences) - { - m_n_outputs = m_W_h.rows(); - - m_activation_fun = get_activation(activation); - m_inner_activation_fun = get_activation(inner_activation); - } - - VectorXd GRULayer::step( const VectorXd& x_t ) { - // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L547 - - if(m_time < 0) - throw NNEvaluationException( - "LSTMLayer::compute - time is less than zero!"); - - const auto& act_fun = m_activation_fun; - const auto& in_act_fun = m_inner_activation_fun; - - int tm1 = std::max(0, m_time - 1); - VectorXd h_tm1 = m_h_t.col(tm1); - //VectorXd C_tm1 = m_C_t.col(tm1); - VectorXd z = (m_W_z*x_t + m_b_z + m_U_z*h_tm1).unaryExpr(in_act_fun); - VectorXd r = (m_W_r*x_t + m_b_r + m_U_r*h_tm1).unaryExpr(in_act_fun); - VectorXd hh = (m_W_h*x_t + m_b_h + m_U_h*(r.cwiseProduct(h_tm1))).unaryExpr(act_fun); - m_h_t.col(m_time) = z.cwiseProduct(h_tm1) + (VectorXd::Ones(z.size()) - z).cwiseProduct(hh); - - return VectorXd( m_h_t.col(m_time) ); - } - - MatrixXd GRULayer::scan( const MatrixXd& x ){ - - m_h_t.resize(m_n_outputs, x.cols()); - m_h_t.setZero(); - m_time = -1; - - for(m_time=0; m_time < x.cols(); m_time++){ - this->step( x.col( m_time ) ); - } - - return m_return_sequences ? m_h_t : m_h_t.col(m_h_t.cols() - 1); - } - - // _____________________________________________________________________ - // Activation functions - // - // There are two functions below. In most cases the activation layer - // can be implemented as a unary function, but in some cases - // (i.e. softmax) something more complicated is reqired. - - // Note that in the first case you own this layer! It's your - // responsibility to delete it. - ILayer* get_raw_activation_layer(Activation activation) { - // Check for special cases. If it's not one, use - // UnaryActivationLayer - switch (activation) { - case Activation::SOFTMAX: return new SoftmaxLayer; - default: return new UnaryActivationLayer(activation); - } - } - - // Most activation functions should be handled here. - std::function<double(double)> get_activation(lwt::Activation act) { - using namespace lwt; - switch (act) { - case Activation::SIGMOID: return nn_sigmoid; - case Activation::HARD_SIGMOID: return nn_hard_sigmoid; - case Activation::TANH: return nn_tanh; - case Activation::RECTIFIED: return nn_relu; - case Activation::LINEAR: return [](double x){return x;}; - default: { - throw NNConfigurationException("Got undefined activation function"); - } - } - } - - - double nn_sigmoid( double x ){ - //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L35 - if (x < -30.0) return 0.0; - if (x > 30.0) return 1.0; - return 1.0 / (1.0 + std::exp(-1.0*x)); - } - - double nn_hard_sigmoid( double x ){ - //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L279 - double out = 0.2*x + 0.5; - if (out < 0) return 0.0; - if (out > 1) return 1.0; - return out; - } - - double nn_tanh( double x ){ - return std::tanh(x); - } - - double nn_relu( double x) { - if (std::isnan(x)) return x; - else return x > 0 ? x : 0; - } - - - // ________________________________________________________________________ - // utility functions - MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs) - { - size_t n_elements = weights.size(); - if ((n_elements % n_inputs) != 0) { - std::string problem = "matrix elements not divisible by number" - " of columns. Elements: " + std::to_string(n_elements) + - ", Inputs: " + std::to_string(n_inputs); - throw lwt::NNConfigurationException(problem); - } - size_t n_outputs = n_elements / n_inputs; - MatrixXd matrix(n_outputs, n_inputs); - for (size_t row = 0; row < n_outputs; row++) { - for (size_t col = 0; col < n_inputs; col++) { - double element = weights.at(col + row * n_inputs); - matrix(row, col) = element; - } - } - return matrix; - } - VectorXd build_vector(const std::vector<double>& bias) { - VectorXd out(bias.size()); - size_t idx = 0; - for (const auto& val: bias) { - out(idx) = val; - idx++; - } - return out; - } - - // consistency checks - void throw_if_not_maxout(const LayerConfig& layer) { - bool wt_ok = layer.weights.size() == 0; - bool bias_ok = layer.bias.size() == 0; - bool maxout_ok = layer.sublayers.size() > 0; - bool act_ok = layer.activation == Activation::NONE; - if (wt_ok && bias_ok && maxout_ok && act_ok) return; - throw NNConfigurationException("layer has wrong info for maxout"); - } - void throw_if_not_dense(const LayerConfig& layer) { - if (layer.sublayers.size() > 0) { - throw NNConfigurationException("sublayers in dense layer"); - } - } - - void throw_if_not_normalization(const LayerConfig& layer) { - if (layer.sublayers.size() > 0) { - throw NNConfigurationException("sublayers in normalization layer"); - } - } - - // component-wise getters (for Highway, lstm, etc) - DenseComponents get_component(const lwt::LayerConfig& layer, size_t n_in) { - using namespace Eigen; - using namespace lwt; - MatrixXd weights = build_matrix(layer.weights, n_in); - size_t n_out = weights.rows(); - VectorXd bias = build_vector(layer.bias); - - // the u element is optional - size_t u_el = layer.U.size(); - MatrixXd U = u_el ? build_matrix(layer.U, n_out) : MatrixXd::Zero(0,0); - - size_t u_out = U.rows(); - size_t b_out = bias.rows(); - bool u_mismatch = (u_out != n_out) && (u_out > 0); - if ( u_mismatch || b_out != n_out) { - throw NNConfigurationException( - "Output dims mismatch, W: " + std::to_string(n_out) + - ", U: " + std::to_string(u_out) + ", b: " + std::to_string(b_out)); - } - return {weights, U, bias}; - } - - - // ______________________________________________________________________ - // Input preprocessors - - // simple feed-forwared version - InputPreprocessor::InputPreprocessor(const std::vector<Input>& inputs): - m_offsets(inputs.size()), - m_scales(inputs.size()) - { - size_t in_num = 0; - for (const auto& input: inputs) { - m_offsets(in_num) = input.offset; - m_scales(in_num) = input.scale; - m_names.push_back(input.name); - in_num++; - } - } - VectorXd InputPreprocessor::operator()(const ValueMap& in) const { - VectorXd invec(m_names.size()); - size_t input_number = 0; - for (const auto& in_name: m_names) { - if (!in.count(in_name)) { - throw NNEvaluationException("can't find input: " + in_name); - } - invec(input_number) = in.at(in_name); - input_number++; - } - return (invec + m_offsets).cwiseProduct(m_scales); - } - - - // Input vector preprocessor - InputVectorPreprocessor::InputVectorPreprocessor( - const std::vector<Input>& inputs): - m_offsets(inputs.size()), - m_scales(inputs.size()) - { - size_t in_num = 0; - for (const auto& input: inputs) { - m_offsets(in_num) = input.offset; - m_scales(in_num) = input.scale; - m_names.push_back(input.name); - in_num++; - } - // require at least one input at configuration, since we require - // at least one for evaluation - if (in_num == 0) { - throw NNConfigurationException("need at least one input"); - } - } - MatrixXd InputVectorPreprocessor::operator()(const VectorMap& in) const { - using namespace Eigen; - if (in.size() == 0) { - throw NNEvaluationException("Empty input map"); - } - size_t n_cols = in.begin()->second.size(); - MatrixXd inmat(m_names.size(), n_cols); - size_t in_num = 0; - for (const auto& in_name: m_names) { - if (!in.count(in_name)) { - throw NNEvaluationException("can't find input: " + in_name); - } - const auto& invec = in.at(in_name); - if (invec.size() == 0) { - throw NNEvaluationException("Input vector of zero length"); - } - if (invec.size() != n_cols) { - throw NNEvaluationException("Input vector size mismatch"); - } - inmat.row(in_num) = Map<const VectorXd>(invec.data(), invec.size()); - in_num++; - } - return m_scales.asDiagonal() * (inmat.colwise() + m_offsets); - } - - - // ______________________________________________________________________ - // excpetions - LightweightNNException::LightweightNNException(std::string problem): - std::logic_error(problem) - {} - NNConfigurationException::NNConfigurationException(std::string problem): - LightweightNNException(problem) - {} - NNEvaluationException::NNEvaluationException(std::string problem): - LightweightNNException(problem) - {} - -} diff --git a/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx b/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx deleted file mode 100644 index 38ca7c2f1d4d1eb90bfb8f4d63a4c63e101815b1..0000000000000000000000000000000000000000 --- a/PhysicsAnalysis/JetTagging/JetTagTools/src/parse_json.cxx +++ /dev/null @@ -1,203 +0,0 @@ -/* - Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration -*/ - -// WARNING: this code was copied automatically from -// https://github.com/lwtnn/lwtnn.git (rev v2.0) -// Please don't edit it! To get the latest version, run -// > ./update-lwtnn.sh -// from JetTagTools/share - -#include "JetTagTools/parse_json.h" - -#include <boost/property_tree/ptree.hpp> -#include <boost/property_tree/json_parser.hpp> -#include <cassert> -#include <string> - -#include <iostream> - -namespace { - using namespace boost::property_tree; - using namespace lwt; - lwt::Activation get_activation(const std::string&); - lwt::Architecture get_architecture(const std::string&); - void set_defaults(LayerConfig& lc); - void add_dense_info(LayerConfig& lc, const ptree::value_type& pt); - void add_maxout_info(LayerConfig& lc, const ptree::value_type& pt); - void add_component_info(LayerConfig& lc, const ptree::value_type& pt); - void add_embedding_info(LayerConfig& lc, const ptree::value_type& pt); -} - - -namespace lwt { - - JSONConfig parse_json(std::istream& json) - { - boost::property_tree::ptree pt; - boost::property_tree::read_json(json, pt); - - JSONConfig cfg; - for (const auto& v: pt.get_child("inputs")) { - std::string name = v.second.get<std::string>("name"); - auto offset = v.second.get<double>("offset"); - auto scale = v.second.get<double>("scale"); - Input input{name, offset, scale}; - cfg.inputs.push_back(input); - } - for (const auto& v: pt.get_child("layers")) { - LayerConfig layer; - set_defaults(layer); - Architecture arch = get_architecture( - v.second.get<std::string>("architecture")); - - if (arch == Architecture::DENSE) { - add_dense_info(layer, v); - } else if (arch == Architecture::NORMALIZATION) { - add_dense_info(layer, v); // re-use dense layer - } else if (arch == Architecture::MAXOUT) { - add_maxout_info(layer, v); - } else if (arch == Architecture::LSTM || - arch == Architecture::GRU || - arch == Architecture::HIGHWAY) { - add_component_info(layer, v); - } else if (arch == Architecture::EMBEDDING) { - add_embedding_info(layer, v); - } else { - throw std::logic_error("architecture not implemented"); - } - layer.architecture = arch; - - cfg.layers.push_back(layer); - } - for (const auto& v: pt.get_child("outputs")) - { - assert(v.first.empty()); // array elements have no names - cfg.outputs.push_back(v.second.data()); - } - const std::string dname = "defaults"; - if (pt.count(dname)) { - for (const auto& def: pt.get_child(dname)) { - cfg.defaults.emplace(def.first, def.second.get_value<double>()); - } - } - const std::string mname = "miscellaneous"; - if (pt.count(mname)) { - for (const auto& misc: pt.get_child(mname)) { - cfg.miscellaneous.emplace( - misc.first, misc.second.get_value<std::string>()); - } - } - return cfg; - } - -} - -namespace { - - lwt::Activation get_activation(const std::string& str) { - using namespace lwt; - if (str == "linear") return Activation::LINEAR; - if (str == "sigmoid") return Activation::SIGMOID; - if (str == "rectified") return Activation::RECTIFIED; - if (str == "softmax") return Activation::SOFTMAX; - if (str == "tanh") return Activation::TANH; - if (str == "hard_sigmoid") return Activation::HARD_SIGMOID; - throw std::logic_error("activation function " + str + " not recognized"); - return Activation::LINEAR; - } - - - lwt::Architecture get_architecture(const std::string& str) { - using namespace lwt; - if (str == "dense") return Architecture::DENSE; - if (str == "normalization") return Architecture::NORMALIZATION; - if (str == "highway") return Architecture::HIGHWAY; - if (str == "maxout") return Architecture::MAXOUT; - if (str == "lstm") return Architecture::LSTM; - if (str == "gru") return Architecture::GRU; - if (str == "embedding") return Architecture::EMBEDDING; - throw std::logic_error("architecture " + str + " not recognized"); - } - - void set_defaults(LayerConfig& layer) { - layer.activation = Activation::NONE; - layer.inner_activation = Activation::NONE; - layer.architecture = Architecture::NONE; - } - - void add_dense_info(LayerConfig& layer, const ptree::value_type& v) { - for (const auto& wt: v.second.get_child("weights")) { - layer.weights.push_back(wt.second.get_value<double>()); - } - for (const auto& bs: v.second.get_child("bias")) { - layer.bias.push_back(bs.second.get_value<double>()); - } - // this last category is currently only used for LSTM - if (v.second.count("U") != 0) { - for (const auto& wt: v.second.get_child("U") ) { - layer.U.push_back(wt.second.get_value<double>()); - } - } - - if (v.second.count("activation") != 0) { - layer.activation = get_activation( - v.second.get<std::string>("activation")); - } - - } - - void add_maxout_info(LayerConfig& layer, const ptree::value_type& v) { - using namespace lwt; - for (const auto& sub: v.second.get_child("sublayers")) { - LayerConfig sublayer; - set_defaults(sublayer); - add_dense_info(sublayer, sub); - layer.sublayers.push_back(sublayer); - } - } - - - const std::map<std::string, lwt::Component> component_map { - {"i", Component::I}, - {"o", Component::O}, - {"c", Component::C}, - {"f", Component::F}, - {"z", Component::Z}, - {"r", Component::R}, - {"h", Component::H}, - {"t", Component::T}, - {"carry", Component::CARRY} - }; - - void add_component_info(LayerConfig& layer, const ptree::value_type& v) { - using namespace lwt; - for (const auto& comp: v.second.get_child("components")) { - LayerConfig cfg; - set_defaults(cfg); - add_dense_info(cfg, comp); - layer.components[component_map.at(comp.first)] = cfg; - } - layer.activation = get_activation( - v.second.get<std::string>("activation")); - if (v.second.count("inner_activation") != 0) { - layer.inner_activation = get_activation( - v.second.get<std::string>("inner_activation")); - } - } - - - void add_embedding_info(LayerConfig& layer, const ptree::value_type& v) { - using namespace lwt; - for (const auto& sub: v.second.get_child("sublayers")) { - EmbeddingConfig emb; - for (const auto& wt: sub.second.get_child("weights")) { - emb.weights.push_back(wt.second.get_value<double>()); - } - emb.index = sub.second.get<int>("index"); - emb.n_out = sub.second.get<int>("n_out"); - layer.embedding.push_back(emb); - } - } - -}