Skip to content
Snippets Groups Projects
Commit bbb5bdd1 authored by Christoph Hasse's avatar Christoph Hasse :cartwheel_tone1:
Browse files

feat: introduce a new implementation of IFactory which uses the project...

feat: introduce a new implementation of IFactory which uses the project compiler to jit compile functors
parent c9901b08
No related branches found
No related tags found
1 merge request!2699FunctorFactory, replace CLING backend with native compiler
......@@ -47,6 +47,7 @@ gaudi_add_module(FunctorCore
SOURCES
src/Components/ExampleAlg.cpp
src/Components/Factory.cpp
src/Components/GigaFactory.cpp
LINK
Boost::headers
FunctorCoreLib
......@@ -78,3 +79,22 @@ gaudi_install(PYTHON)
gaudi_add_tests(QMTest)
gaudi_add_tests(pytest ${CMAKE_CURRENT_SOURCE_DIR}/python)
set(JIT_CMD_FILE "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/functor_jitter")
message(STATUS "Generating JIT Wrapper at: ${JIT_CMD_FILE}")
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
get_property(tmp DIRECTORY PROPERTY COMPILE_DEFINITIONS)
set(defines)
if (tmp)
foreach(item ${tmp})
list(APPEND defines "-D${item}")
endforeach()
endif()
# FIXME how the hell does one get a complete list of flags!??!?!? :/
file(WRITE ${JIT_CMD_FILE} "#!/bin/sh\n# Auto-generated script to create a jitter for the FunctorGigaFactory\nexec ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fPIC -shared ${defines} \"$@\"\n")
execute_process(COMMAND chmod a+x ${JIT_CMD_FILE})
/*****************************************************************************\
* (c) Copyright 2019-20 CERN for the benefit of the LHCb Collaboration *
* *
* This software is distributed under the terms of the GNU General Public *
* Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". *
* *
* In applying this licence, CERN does not waive the privileges and immunities *
* granted to it by virtue of its status as an Intergovernmental Organization *
* or submit itself to any jurisdiction. *
\*****************************************************************************/
#include "Functors/Cache.h"
#include "Functors/FunctorDesc.h"
#include "Functors/IFactory.h"
#include "GaudiKernel/Service.h"
#include "boost/algorithm/string/erase.hpp"
#include "boost/algorithm/string/predicate.hpp"
#include "boost/algorithm/string/replace.hpp"
#include "boost/algorithm/string/split.hpp"
#include "boost/algorithm/string/trim.hpp"
#include "boost/format.hpp"
#include "boost/lexical_cast.hpp"
#include <dlfcn.h>
#include <fstream>
#include <set>
#include <stdio.h>
#include <vector>
/** @file Factory.cpp
* @brief Definitions of non-templated functor factory functions.
*/
namespace {
std::ostream& include( std::ostream& os, std::string_view header ) {
if ( header.empty() ) { throw GaudiException{"Got empty header name", "FunctorGigaFactory", StatusCode::FAILURE}; }
os << "#include ";
if ( header.front() == '<' && header.back() == '>' ) {
os << header;
} else {
os << '"' << header << '"';
}
return os << '\n';
}
std::string make_default_cppname( std::string cppname ) {
if ( boost::algorithm::starts_with( cppname, "ToolSvc." ) ) { cppname.erase( 0, 8 ); }
std::replace_if(
cppname.begin(), cppname.end(), []( char c ) { return c == '.' || c == ' '; }, '_' );
boost::algorithm::replace_all( cppname, "::", "__" );
cppname.insert( 0, "FUNCTORS_" );
return cppname;
}
std::unique_ptr<std::ostream> openFile( std::string namebase, unsigned short findex,
std::vector<std::string> const& lines,
std::vector<std::string> const& headers ) {
// construct the file name
// 1) remove trailing .cpp
if ( boost::algorithm::ends_with( namebase, ".cpp" ) ) boost::algorithm::erase_tail( namebase, 4 );
// 2) replace blanks by underscore
std::replace( namebase.begin(), namebase.end(), ' ', '_' );
// 3) construct the name
boost::format fname( "%s_%04d.cpp" );
fname % namebase % findex;
auto file = std::make_unique<std::ofstream>( fname.str() );
// write the include directives
*file << "// Generic statements\n";
for ( const auto& l : lines ) { *file << l << '\n'; }
*file << "// Explicitly declared include files:\n";
for ( const auto& h : headers ) { include( *file, h ); }
return file;
}
/** Write out the entry in a generated functor cache .cpp file for a single functor.
*/
std::ostream& makeCode( std::ostream& stream, Functors::Cache::HashType hash, std::string_view type,
std::string_view code ) {
boost::format declareFactory{R"_(namespace {
using t_%1% = %2%;
Gaudi::PluginService::DeclareFactory<t_%1%> functor_%1%{
Functors::Cache::id( %1% ),
[]() -> std::unique_ptr<Functors::AnyFunctor> {
return std::make_unique<t_%1%>(
%3%
);
}
};
}
)_"};
return stream << ( declareFactory % boost::io::group( std::showbase, std::hex, hash ) % type % code );
}
} // namespace
/** @class FunctorGigaFactory
*
* This service does all the heavy lifting behind compiling functors into
* type-erased Functor<Out(In)> objects. It can do this either via ROOT's
* just-in-time compilation backend, or by using a pre-compiled functor
* cache. The tool is also responsible for generating the code that
* produces the precompiled functor cache. It is heavily inspired by Vanya
* Belyaev's LoKi::Hybrid::Base.
*/
struct FunctorGigaFactory : public extends<Service, Functors::IFactory> {
using extends::extends;
// pointer-to-function type we use when JIT compiling
using factory_function_ptr = functor_base_t ( * )();
functor_base_t get_impl( Gaudi::Algorithm* owner, std::string_view functor_type, ThOr::FunctorDesc const& desc,
CompilationBehaviour compile ) override {
// Combine the 'compile' argument with the global settings to determine
// what compilation methods we should try
bool const fail_hard = compile & ExceptionOnFailure;
bool const use_jit{this->m_use_jit && ( compile & TryJIT )};
bool const use_cache{this->m_use_cache && ( compile & TryCache )};
// Prepare the string that fully specifies the functor we want to retrieve -- basically the combination of
// input type, output type, functor string
// First, sort and de-duplicate the headers
auto headers = desc.headers;
std::sort( headers.begin(), headers.end() );
headers.erase( std::unique( headers.begin(), headers.end() ), headers.end() );
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "Decoding " << desc.code << endmsg;
debug() << "With extra headers:";
for ( auto const& header_line : headers ) { debug() << " " << header_line; }
debug() << endmsg;
}
// FIXME it seems that having a quoted string in the middle of the
// string adds quotes at the start/end...need Gaudi!919
std::size_t findex{desc.code.front() == '"'};
std::size_t codelen{desc.code.size() - findex - ( desc.code.back() == '"' )};
auto trimmed_code = std::string_view{desc.code}.substr( findex, codelen );
// This is basically Functor<Out(In)>( PTCUT( ... ) ... )
std::string full_code{functor_type};
full_code.append( "( " );
full_code.append( trimmed_code );
full_code.append( " )" );
// Now we can calculate the hash
const auto hash = Functors::Cache::makeHash( full_code );
if ( msgLevel( MSG::VERBOSE ) ) {
verbose() << "Full string for hash: " << full_code << endmsg;
verbose() << "Resulting hash is " << std::hex << std::showbase << hash << endmsg;
}
// The object we'll eventually return
functor_base_t functor;
// See if we can magically load the functor from the cache (Gaudi magic!)
// Don't bother trying if we were told not to
if ( use_cache ) {
functor = ::Gaudi::PluginService::Factory<Functors::AnyFunctor*()>::create( Functors::Cache::id( hash ) );
if ( functor ) {
functor->setJITCompiled( false );
} else if ( !functor && !use_jit ) {
// We print a different INFO message below if use_jit is true
info() << "Cache miss for functor: " << trimmed_code << endmsg;
}
}
// Shorthand for throwing an informative exception
auto exception = [functor_type]( auto const& name ) {
std::string ourname{"FunctorGigaFactory::get<"};
ourname.append( functor_type );
ourname.append( ">" );
return GaudiException{name, std::move( ourname ), StatusCode::FAILURE};
};
if ( !functor && use_jit ) {
// See if we already JIT compiled this functor and can therefore reuse
// the factory function that we got before
auto iter = m_factories.find( hash );
if ( iter != m_factories.end() ) {
// We had already JIT compiled this functor
info() << "Reusing jit compiled factory for functor: " << trimmed_code << endmsg;
auto factory_function = iter->second;
functor = factory_function();
// don't set jit compiled, that is only a hack for cling crap
// functor->setJITCompiled( true );
} else {
// Need to actually do the JIT compilation
if ( use_cache ) {
info() << "Cache miss for functor: " << trimmed_code << ", now trying JIT with headers " << headers << endmsg;
} else {
info() << "Using compiler for functor: " << trimmed_code << " with headers " << headers << endmsg;
}
// The expression we ask the compiler to compile is not quite the same as
// 'full_code'. Instead of Functor<Out(In)>( PT > ... ) we ask it to
// compile the declaration of a function returning functor_base_t that
// takes no arguments. We then ask dlsym to give us the address of
// this function and call it ourselves. This looks like:
// functor_base_t functor_0xdeadbeef() {
// return std::make_unique<Functor<Out(In)>>( PT > ... );
// }
// FIXME
// FIXME
// 1. factor out the shared code with the other Factory
//
// 2. what to do about dlclose? technically as long as this service
// lives there could be someone who comes and asks again for a
// functor we already opened, so we shouldn't really dlclose before
// finalize ? But at finalize we will quit anyways so I guess for
// now I'll just leak :D Other option to avoid uncontrolled growing
// of memory would be to set a maximum of allowed open handles?
// We could implement some reference counting to keep track if a
// library is still in use and otherwise close it.
//
// 3. right now we create 1 lib per functor, can we somehow cluster
// this?
//
//
// FIXME
// FIXME
std::ostringstream code;
#ifdef USE_DD4HEP
code << "#define USE_DD4HEP\n";
#endif
// Include the required headers
for ( auto const& header : headers ) { include( code, header ); }
// Get the name for the factory function. Add a suffix to avoid it
// matching the cache entries.
auto function_name = Functors::Cache::id( hash ) + "_jit";
code << "extern \"C\" {\n";
// Declare the factory function
code << functor_base_t_str << " " << function_name << "() { return std::make_unique<" << functor_type << ">( "
<< trimmed_code << " ); }\n";
code << "}";
if ( msgLevel( MSG::VERBOSE ) ) { verbose() << "Full code to JIT is:\n" << code.str() << endmsg; }
auto all_includes = "-isystem" + System::getEnv( "ROOT_INCLUDE_PATH" );
boost::replace_all( all_includes, ":", " -isystem" );
auto tmp_dir = System::getEnv( "FUNCTOR_JIT_TMPDIR" );
// if FUNCTOR_JIT_TMPDIR defined make sure we have a "/" at the end
// if not, default to "/tmp/"
tmp_dir = ( tmp_dir == "UNKNOWN" ) ? "/tmp/" : tmp_dir + "/";
auto const file_prefix = tmp_dir + function_name;
auto const cpp_filename = file_prefix + ".cpp";
auto const lib_filename = file_prefix + ".so";
{
std::ofstream out( cpp_filename );
out << code.str();
out.close();
}
// functor_jitter is a shell script generated by cmake to invoke the correct compiler with the correct flags
// see Phys/FunctorCore/CMakeLists.txt
auto cmd = "functor_jitter -o " + lib_filename + " " + all_includes + " " + cpp_filename + " 2>&1";
if ( msgLevel( MSG::VERBOSE ) ) { verbose() << "command passed to popen:\n" << cmd << endmsg; }
FILE* pipe = popen( cmd.c_str(), "r" );
if ( pipe == nullptr ) { throw exception( "Couldn't start command." ); }
const size_t buffer_size = 128;
std::array<char, buffer_size> buffer{};
std::string cmd_out;
while ( fgets( buffer.data(), buffer_size, pipe ) != nullptr ) { cmd_out += buffer.data(); }
auto returnCode = pclose( pipe );
if ( returnCode != 0 ) { throw exception( "Non zero return code!\n" + cmd_out ); }
void* lib = dlopen( lib_filename.c_str(), RTLD_LOCAL | RTLD_LAZY );
if ( lib == nullptr ) { throw exception( dlerror() ); }
auto factory_function = reinterpret_cast<factory_function_ptr>( dlsym( lib, function_name.c_str() ) );
if ( factory_function == nullptr ) { throw exception( dlerror() ); }
// Save the factory function pointer
m_factories.emplace( hash, factory_function );
// Use the JITted factory function
functor = factory_function();
functor->setJITCompiled( true );
}
}
if ( functor ) {
functor->bind( owner );
} else if ( fail_hard && ( use_cache || use_jit ) ) {
// Don't emit too many messages while generating the functor caches. In
// that case both JIT and the cache are disabled, so we will never
// actually retrieve a functor here.
std::string error_message{"Couldn't load functor using ["};
if ( use_cache && use_jit ) {
error_message += "cache, JIT";
} else if ( use_cache ) {
error_message += "cache";
} else {
error_message += "JIT";
}
error_message += "]: " + desc.repr;
throw exception( error_message );
}
// If we're going to write out the .cpp files for creating the functor cache when we finalise then we need to
// store the relevant data in an internal structure
if ( this->m_makeCpp ) {
// Store the functor alongside others with the same headers
m_functors[std::move( headers )].emplace( hash, functor_type, trimmed_code );
}
return functor;
}
/** Write out the C++ files needed to compile the functor cache if needed.
*/
StatusCode finalize() override {
if ( m_makeCpp ) { writeCpp(); }
return Service::finalize();
}
protected:
using HashType = Functors::Cache::HashType;
using FactoryCache = std::map<HashType, factory_function_ptr>;
using FunctorSet = std::map<std::vector<std::string>, std::set<std::tuple<HashType, std::string, std::string>>>;
FunctorSet m_functors; // {headers: [{hash, type, code}, ...]}
FactoryCache m_factories; // {hash: factory function pointer, ...}
/** @brief Generate the functor cache .cpp files.
*
* In order to expose as many bugs as possible, make sure that we generate a
* different .cpp file for every set of requested headers, so every functor
* is compiled with *exactly* the headers that were requested.
*/
void writeCpp() const {
/** The LoKi meaning of this parameter was:
* - positive: write N-files
* - negative: write N-functors per file
* - zero : write one file
* currently it is not fully supported, we just use it to check that CMake
* is aware of at least as many source files as the minimum we need.
*
* @todo When split > m_functors.size() then split the functors across
* more files until we are writing to all 'split' available source
* files.
*/
std::size_t split{0};
if ( !boost::conversion::try_lexical_convert( System::getEnv( "LOKI_GENERATE_CPPCODE" ), split ) ) { split = 0; }
if ( m_functors.size() > split ) {
throw GaudiException{"Functor factory needs to generate at least " + std::to_string( m_functors.size() ) +
" source files, but LOKI_GENERATE_CPPCODE was set to " + std::to_string( split ) +
". Increase the SPLIT setting in the call to loki_functors_cache() to at least " +
std::to_string( m_functors.size() ),
"FunctorGigaFactory", StatusCode::FAILURE};
}
/** We write one file for each unique set of headers
*/
unsigned short ifile{0};
for ( auto const& [headers, func_set] : m_functors ) {
std::unique_ptr<std::ostream> file;
unsigned short iwrite{0};
for ( auto const& [hash, functor_type, brief_code] : func_set ) {
if ( !file ) { file = openFile( m_cppname, ++ifile, m_cpplines, headers ); }
*file << '\n' << std::dec << std::noshowbase << "// FUNCTOR #" << ++iwrite << "/" << func_set.size() << '\n';
// write actual C++ code
::makeCode( *file, hash, functor_type, brief_code );
*file << '\n';
}
}
// Make sure the remaining files are empty. This ensures generated code
// from previous builds (with more functors) is overwritten and does not
// interfere with the new build.
while ( ifile < split ) openFile( m_cppname, ++ifile, {}, {} );
}
// Flags to steer the use of JIT and the functor cache
Gaudi::Property<bool> m_use_cache{this, "UseCache", System::getEnv( "LOKI_DISABLE_CACHE" ) == "UNKNOWN"};
Gaudi::Property<bool> m_use_jit{this, "UseJIT", System::getEnv( "LOKI_DISABLE_CLING" ) == "UNKNOWN"};
Gaudi::Property<bool> m_makeCpp{this, "MakeCpp", System::getEnv( "LOKI_GENERATE_CPPCODE" ) != "UNKNOWN"};
// Properties steering the generated functor cache code
Gaudi::Property<std::string> m_cppname{this, "CppFileName", make_default_cppname( this->name() )};
Gaudi::Property<std::vector<std::string>> m_cpplines{this, "CppLines", {"#include \"Functors/Cache.h\""}};
};
DECLARE_COMPONENT( FunctorGigaFactory )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment