diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/ArgQual.h b/Database/APR/CollectionUtilities/CollectionUtilities/ArgQual.h new file mode 100644 index 0000000000000000000000000000000000000000..8ee6479efdcd7a7964e45fabf5a469aa4f42d190 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/ArgQual.h @@ -0,0 +1,95 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_ARGQUAL +#define UTILITIES_COLLECTION_ARGQUAL + +#include <string> +#include <iostream> +#include <sstream> +#include <vector> +#include <map> +#include <algorithm> + +/********************************************************** + +ArgQual is a class version of a struct + +Purpose: Contain all information on the state of + input for a command line argument qualifier + + argsize: cardinality of the qualifier arguments + multiarg: whether there can be multiple units + of the cardinality + required: whether the argument is required + desc: a string stream with a readable description + of the qualifier and it's defaults + +Example of Usage with CmdLineArgs2: + + Args2Container argsVec(thisProgram); + + QualList markers; + markers.insert( make_pair("-somequal", ArgQual()) ); + markers["-somequal"].desc << "this is a qualifier with default=this"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + cmdLineArgs.evalArgs(argc,argv); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + ... + + % someProgram.exe -somequal thisval + +**********************************************************/ +namespace pool +{ + class ArgQual + { + public: + ArgQual() : argsize(1),multiarg(false),required(false) {} + + ArgQual(int i, bool b=false, bool r=false) : argsize(i), + multiarg(b), + required(r) + {} + + ArgQual(std::string d, int i, bool b=false, bool r=false) + : argsize(i), + multiarg(b), + required(r) + { + desc << d; + } + + ArgQual(const ArgQual& a) { *this = a; } + + ArgQual& operator =(const ArgQual& r) { + if( this != &r ) { + argsize = r.argsize; + multiarg = r.multiarg; + required = r.required; + desc.clear(); desc << r.desc.str(); + } + return *this; + } + + int argsize; + bool multiarg; + bool required; + std::stringstream desc; + + void print(std::ostream&) const; + + friend std::ostream& operator<<(std::ostream& theStream, ArgQual& rhs); + }; + + typedef std::map<std::string,ArgQual> QualList; +} + +#endif // UTILITIES_COLLECTION_ARGQUAL + + + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/Args2Container.h b/Database/APR/CollectionUtilities/CollectionUtilities/Args2Container.h new file mode 100644 index 0000000000000000000000000000000000000000..ad16bfbd4aa0bef66689cff5fce359bcd751a4ca --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/Args2Container.h @@ -0,0 +1,122 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_ARGS2CONTAINER +#define UTILITIES_COLLECTION_ARGS2CONTAINER + +#include <string> +#include <vector> +#include <algorithm> +#include <map> +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/*************************************************************** + +Args2Container is a simple extension of a vector of CmdLineArgs2 + +It has a name and a desc. Name is normally the program name. +It also adds three utility methods. + +printHelp(verbose) : + - (verbose = false) print out names of ArgQuals + for all contained pointers + - (verbose = true) print out names _and_ desc of ArgQuals + for all contained pointers + +checkValid() : Loop over pointers and call valid() method + +evalArgs(argc,argv) : save time/errors by calling evalArgs + on the container rather than the individual CmdLineArg2's + +bool doXML() : whether to write out an xml file with the argv + results classified by CmdLineArgs2 + +WIP pieces for XML reading/state recovery +------------------------------------------- +void useXMLInput(Args2Container::XMLOption) + +XMLOption actions: + + OVERRIDE (NI): evalArgs + parse XML + Pt CLA2 in XML at non-argv + APPEND (NI): evalArgs + parse XML + if CLA2 missing pt it at non-argv + FILEONLY: parse XML + pt CLA2 in XML at non-argv + +* Note that the minimum unit is CLA2, e.g. you can't take -src +from the command line and -srcconnect from XML for SrcInfo + +***************************************************************/ + +#include "CoralBase/MessageStream.h" + +namespace pool +{ + class Args2Container : public std::vector<CmdLineArgs2*> + { + public: + enum XMLVersion {V1=0,V2,NUM_VERSIONS}; + enum XMLOption {FILEONLY=0,OVERRIDE,APPEND}; + + Args2Container(std::string name="Args2Container", + bool doxml = false, + coral::MessageStream* log = 0); + + void init(); + + void printHelp(bool verbose) const; + bool checkValid() const; + + // Pass argv to all cliarg's + bool evalArgs(std::vector<std::string>& argv); + bool evalArgs( int argc, const char *argv[] ); + + // Pass argv to specified cliarg + bool evalArgs(const std::string cliarg, std::vector<std::string>& argv); + + bool doXML() const {return m_xml;} + void setXMLOption(Args2Container::XMLOption a) { + m_xmlOpt = a; + m_xml=true; // if they're setting it, assume they're using it. + } + + std::stringstream desc; + + coral::MessageStream &m_log; + + private: + + // input xml methods + std::vector<std::string> fillCmdLineArgsFromXML(std::string file); + + // output xml methods + void writeXMLContent(std::vector<std::string>& argv); + + std::string m_name; + bool m_xml; + XMLOption m_xmlOpt; + XMLVersion m_ver; + CmdLineArgs2 m_a2c_cla2; + + std::string m_xFileName; + + std::vector< std::string > m_argv; + }; + +}// end pool namespace + +#endif // UTILITIES_COLLECTION_ARGS2CONTAINER + + + + + + + + + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CatalogInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/CatalogInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..ac9b23a7fd3cfbe87339f42603b93276ba944fe0 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CatalogInfo.h @@ -0,0 +1,56 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_CATALOGINFO +#define UTILITIES_COLLECTION_CATALOGINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +CatalogInfo is an concretization of a CmdLineArgs2 + to contain catalog info. + +Qualifiers: -ccread, -fcread + +**********************************************************/ + +namespace pool +{ + class ICollectionService; + class IFileCatalog; + + + class CatalogInfo : public CmdLineArgs2 + { + public: + + /// Constructors + CatalogInfo( ); + + virtual ~CatalogInfo() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + std::string collCatalogN(unsigned int i); + std::string fileCatalogN(unsigned int i); + bool useCC() {return m_valid;} + + void setCatalogs( pool::ICollectionService* ); + void setFileCatalogs( pool::IFileCatalog* ); + + private: + bool m_valid; + std::vector<std::string> m_collCatalogReadVec; + std::vector<std::string> m_fileCatalogReadVec; + + }; + +} //end pool namespace + +#endif // UTILITIES_COLLECTION_CATALOGINFO + + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs.h b/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs.h new file mode 100644 index 0000000000000000000000000000000000000000..06e866d780cb564d948d898cdaf7dcf1b1a6cbeb --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs.h @@ -0,0 +1,96 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_CMDLINEARGS +#define UTILITIES_COLLECTION_CMDLINEARGS + +#include <string> +#include <vector> +#include <algorithm> + +namespace pool +{ + + class CmdLineArgs + { + public: + + /// Constructor. + CmdLineArgs( int argc, const char* argv[] ); + + /// Check if argument is in command line. + bool hasQual( const std::string arg ) const; + + /// Get argument from command line using qualifier. + std::string getArg( const std::string arg ); + + /// Get argument from command line using position. + std::string getArg( std::vector<std::string>::const_iterator itr ); + + /// Get position on command line. + std::vector<std::string>::const_iterator getPos() const; + + private: + + std::vector<std::string> argvVector; + std::vector<std::string>::const_iterator vecItr; + }; + + + inline CmdLineArgs::CmdLineArgs( int argc, const char* argv[] ) + : argvVector(argc) + { + for( int i=0; i < argc; ++i ) + argvVector[i].assign( argv[i] ); + vecItr = argvVector.begin(); + } + + + inline bool CmdLineArgs::hasQual( const std::string arg ) const + { + return ( std::find( argvVector.begin(), argvVector.end(), arg ) != + argvVector.end() ); + } + + + inline std::string CmdLineArgs::getArg( const std::string arg ) + { + std::vector<std::string>::const_iterator itr + = std::find( argvVector.begin(), argvVector.end(), arg ); + return ( getArg( itr ) ); + } + + + inline std::string CmdLineArgs::getArg( + std::vector<std::string>::const_iterator itr ) + { + vecItr = itr; + if ( vecItr != argvVector.end() ) + { + vecItr++; + if ( vecItr != argvVector.end() ) + { + return *vecItr; + } + else + { + return (""); + } + } + else + { + return (""); + } + } + + + inline std::vector<std::string>::const_iterator CmdLineArgs::getPos() const + { + return vecItr; + } + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_CMDLINEARGS + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs2.h b/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs2.h new file mode 100644 index 0000000000000000000000000000000000000000..f3fab94812bd30f2a8ed76deffb35ac5350b66bd --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CmdLineArgs2.h @@ -0,0 +1,126 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_CMDLINEARGS2 +#define UTILITIES_COLLECTION_CMDLINEARGS2 + +#include <string> +#include <vector> +#include <algorithm> +#include <map> +#include "CollectionUtilities/ArgQual.h" + +/********************************************************** + +CmdLineArgs2 is an extension of a map intended to map + qualifiers (e.g. -help) of a CLI to the index of + the argv[]. It's state is determined by a list of + ArgQual's/QualList. It then uses evalArgs to apply + those qualifiers to an argv[]. + + - Qualifier -help is added in constructor + - methods described in comments in definition + +Example of Usage: + + Args2Container argsVec(thisProgram); + + QualList markers; + markers.insert( make_pair("-somequal", ArgQual()) ); + markers["-somequal"].desc << "this is a qualifier with default=this"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + cmdLineArgs.evalArgs(argc,argv); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + ... + +**********************************************************/ + +namespace pool +{ + class Args2Container; + + class CmdLineArgs2 : public std::map<std::string,std::pair<int,int> > + { + public: + /// Constructor. + CmdLineArgs2( std::string name="General" ); + CmdLineArgs2( QualList&, std::string name="General"); + + virtual ~CmdLineArgs2() {} + + /// init + virtual void init(); + + /// Check if argument was found in the argv[] + bool hasQual( const std::string arg ) const; + + /// check if option was present and return its (first) value in optval(!) + bool getOpt( const std::string opt, std::string &optval ) const; + + /// check if option was present and return its (first) numerical value in optval (!) + bool getOpt( const std::string opt, int &optval ) const; + + /// check if option was present and return its values in optvalvec(!) + bool getOpts( const std::string opt, std::vector<std::string> &optvalvec ) const; + + /// Apply the criteria in the QualList to the argv[] + virtual bool evalArgs(std::vector<std::string>& argv); + + /// Second pass options processing + virtual bool evalArgsPass2(std::vector<std::string>&, pool::Args2Container&) { return true; } + + /// Returns true if evalArgs has been called with no errors + bool valid() const {return m_argsfine;} + + /// identifier + std::string name() const {return m_id;} + + /// + ArgQual& getArgQual(const std::string a) {return m_quals[a];} + QualList& getArgQuals( ) {return m_quals;} + + /// Hack to bring in allowed types of Collections + std::vector<std::string>& allowedTypes() {return m_allowedTypes;} + + /// Number of arguments for a given qualifer, e.g. nArgs("-src") + unsigned int nArgs(std::string); + + /// set the QualList + void setArgQuals( QualList& quals) {m_quals=quals;} + + /// set flag to suppress error messages about missing arguments e.g. when -xmlInput is present + void ignoreMissingArgs( bool ignore ) { m_ignoreMissingArgs = ignore; } + + protected: + + bool m_argsfine; + bool m_ignoreMissingArgs; + std::vector<std::string> m_argv; + + private: + + /// Fill the vector m_allowedTypes + void initTypes(); + + QualList m_quals; + std::string m_id; + std::vector<std::string> m_allowedTypes; + }; + + + inline void CmdLineArgs2::init() + { + this->clear(); + m_argsfine = false; + m_ignoreMissingArgs = false; + } + + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_CMDLINEARGS2 + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CollAppendBase.h b/Database/APR/CollectionUtilities/CollectionUtilities/CollAppendBase.h new file mode 100755 index 0000000000000000000000000000000000000000..9885a94d48edc145448926932eaa919bb776c051 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CollAppendBase.h @@ -0,0 +1,147 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef POOL_COLLUTILITIES_COLLAPPENDBASE_H +#define POOL_COLLUTILITIES_COLLAPPENDBASE_H + +/** + * @file CollAppendBase.h + * @brief Base class header for utilities that copy or merge POOL collections + * @author Marcin.Nowak@cern.ch + * $Id: + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionFragment.h" +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/CollectionDescription.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/TokenList.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/ICollectionDataEditor.h" +#include "CollectionBase/ICollectionMetadata.h" +#include "CollectionBase/CollectionBaseNames.h" + +#include "CoralBase/MessageStream.h" +#include "CoralBase/AttributeList.h" + +#include "CollectionUtilities/DstInfo.h" +#include "CollectionUtilities/Progress.h" +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" + + +#include <string> +#include <map> +#include <set> + + +namespace pool +{ + class MetaInfo; + + class CollAppendBase + { + public: + + CollAppendBase(std::string name="CollAppend"); + virtual ~CollAppendBase(); + + virtual int execute( std::vector<std::string> argv_v ); + + protected: + virtual bool init( std::vector<std::string> argv_v ); + virtual void chkExistingDst( std::vector<bool>& existVec); + + virtual std::vector< pool::ICollection* > + openDestCollections( pool::CollectionDescription& destDescription ); + + virtual pool::ICollection* + openDestCollection( const std::string& name, const std::string& type, const std::string& connect ); + + virtual pool::ICollection* + createDestCollection( const pool::ICollectionDescription& destDescription ); + + virtual pool::CollectionDescription + buildDstDesc(const pool::ICollectionDescription& sourceDesc, + const pool::TokenList &tokens, + const coral::AttributeList &attribs, + const std::string queryopt ); + + virtual pool::ICollection* + openSrcCollection( const std::string& name, const std::string& type, const std::string& connect ); + + virtual std::string + readCollectionGUID( pool::ICollection* collection ); + + virtual void copyData(); + + /// supply user specialization of MetaInfo class for metadata processing - BEFORE init() + virtual void setMetaInfo( MetaInfo* minfo ); + virtual void readMetadata(); + virtual void addMetadata(); + virtual void writeMetadata(); + + virtual void closeCollections(); + virtual void finalize(); + + + std::vector< pool::ICollection* > m_srcCollections; + std::vector< pool::ICollection* > m_destCollections; + + std::string m_thisProgram; + + std::string m_provName; + std::string m_provCLID; + Token m_provToken; + + // used to mark provenance columns removed from the source + bool m_provCollExists; + std::set<std::string> m_removedTokens; + + bool m_noAttrib; + // output modifiers + int m_numEvents; + int m_numEventsPerCommit; + int m_numEventsPerPrint; + int m_numRowsCached; + bool m_extendProv; + bool m_ignoreSchemaDiff; + + time_t m_starttime, m_endtime; + // src collection info + std::vector<int> m_srcCountVec; + unsigned int m_evtCounterTotal; + + // for adding attributes + coral::AttributeList m_addedAttributes; + TokenList m_addedReferences; + + // Classes with shared cli keys and their argument properties + CatalogInfo m_catinfo; + QueryInfo m_queryinfo; + DstInfo m_dstinfo; + SrcInfo m_srcinfo; + MetaInfo* m_metainfo; + Progress m_progress; + + bool m_committed; + pool::ICollectionService* m_collectionService; + mutable coral::MessageStream m_log; + + // Vector of args + Args2Container m_argsVec; + + bool m_initialized; + }; + +} // end pool namespace + +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CollMetaRegistry.h b/Database/APR/CollectionUtilities/CollectionUtilities/CollMetaRegistry.h new file mode 100644 index 0000000000000000000000000000000000000000..66ed3c8ede20289388b247e4a1754e376e5cc4b3 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CollMetaRegistry.h @@ -0,0 +1,37 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_COLLMETAREGISTRY +#define UTILITIES_COLLECTION_COLLMETAREGISTRY + +#include <map> +#include <iostream> +#include "CollectionUtilities/ICollMetaHandler.h" + +namespace pool { + + class CollMetaRegistry : public std::map<ICollMetaHandler::MetaKey,ICollMetaHandler*> + { + public: + typedef std::map<ICollMetaHandler::MetaKey,ICollMetaHandler*> Container; + + static CollMetaRegistry* Instance(); + virtual ~CollMetaRegistry(); + + Container::const_iterator seek(std::string); + + protected: + CollMetaRegistry() {} + private: + static CollMetaRegistry* m_instance; + }; + + inline CollMetaRegistry::~CollMetaRegistry() + { + delete m_instance; + } + +} + +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CollSplitByGUIDBase.h b/Database/APR/CollectionUtilities/CollectionUtilities/CollSplitByGUIDBase.h new file mode 100755 index 0000000000000000000000000000000000000000..ead4d6992880fc497e15f13530fea134e3fbb9b6 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CollSplitByGUIDBase.h @@ -0,0 +1,106 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef COLLECTIONuTILITIES_COLLSPLITGUIDBASE_H +#define COLLECTIONuTILITIES_COLLSPLITGUIDBASE_H + + +#include "CollectionBase/ICollection.h" + +#include "CollectionUtilities/CollectionPool.h" +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionUtilities/MetaInfo.h" + + +#include <string> +#include <map> + +namespace pool +{ + + class CollSplitByGUIDBase + { + public: + CollSplitByGUIDBase( const std::string& name = "CollSplitByGUID" ); + virtual ~CollSplitByGUIDBase(); + + virtual bool init( std::vector<std::string> argv_v ); + + virtual int execute( std::vector<std::string> argv_v ); + + /// use a different collection pool handler than the default one + virtual void setCollectionPool( CollectionPool* ); + + + + std::string m_thisProgram; + + /// maximum number of allowed output collections + int m_maxSplit; + + /// name of the Token attribute that is used for splitting + std::string m_splitRef; + + // src collection info + std::vector<int> m_srcCountVec; + + // dst collection info + std::vector<bool> m_dstCollExistVec; + + // output modifiers + int m_minEvents; + + //unsigned int numEventsPerCommit = 10000; + int m_rowsCached; + + int m_numEventsPerCommit; + int m_numRowsCached; + + std::vector<std::string> m_inputQuery; + + // Classes handling command line options + CatalogInfo m_catinfo; + QueryInfo m_queryinfo; + SrcInfo m_srcinfo; + MetaInfo m_metainfo; + + pool::ICollectionService* m_collectionService; + mutable coral::MessageStream m_log; + + // Vector of args + Args2Container m_argsVec; + + std::vector< pool::ICollection* > m_srcCollections; + + /// pool of output collections + CollectionPool* m_collectionPool; + + protected: + virtual void openSourceCollections(); + virtual void openDestCollections(); + virtual void copyRows(); + virtual void copyMetadata(); + virtual void finalize(); + + /// read user-prepared list of GUIDs and output collection names + virtual bool readGuidList( const std::string& filename ); + /// generate next output collection name + virtual std::string generateNextCollName( ); + /// get a user-specified output collection for a given GUID (empty string if none given) + virtual std::string collectionNameForGuid( const std::string& guid ); + + + // map GUID->output collection name (read from guid list file) + std::map<std::string,std::string> CollNameforGuidMap; + + // sequence counter to generate unique output collection names + int m_outputCollSeqN; + + }; +} + +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CollectionMetadataParser.h b/Database/APR/CollectionUtilities/CollectionUtilities/CollectionMetadataParser.h new file mode 100755 index 0000000000000000000000000000000000000000..3ce7f93af0f37376fcfa534e72aee58c413a31c5 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CollectionMetadataParser.h @@ -0,0 +1,88 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_COLLECTIONMETADATAPARSER +#define UTILITIES_COLLECTION_COLLECTIONMETADATAPARSER + +#include "CollectionBase/ICollectionMetadata.h" +#include <set> +#include <map> +#include <vector> +#include <string> + + +namespace pool +{ + + typedef std::string ProvNode; + typedef std::string MetaKey; + typedef std::string MetaValue; + +/** + * @class CollectionMetadataParser + * + * @brief Because the provenance is encoded in the keys for the CollectionMetadata + * CollectionMetadataParser was created to make the metadata available in a tree + * which presents the data based on provenance nodes. + * + */ + + class CollectionMetadataParser + { + public: + /// Constructor + CollectionMetadataParser(); + /// Constructor init directly + CollectionMetadataParser(pool::ICollectionMetadata& cmd); + + /// Make constructor simple + bool init(pool::ICollectionMetadata& cmd); + + // Track nodes in the tree + + /// Was any data created along with the collection being queried + bool hasSelfData() const; + /// list of all provenance nodes + std::set<ProvNode> listNodes() const; + /// list of all provenance nodes of a certain level (increasing number is further back in prov) + std::set<ProvNode> listNodes(int level) const; + /// get provenance below + std::vector<ProvNode> getChildren(const ProvNode&) const; + /// get provenance above + std::vector<ProvNode> getParents (const ProvNode&) const; + std::map<ProvNode,std::vector<ProvNode> > getTree() const; + + // Retrieve data for Node + + /// Unique metadata + std::set< std::pair<MetaKey,MetaValue> > getUniqueMetaForNode(const ProvNode&) const; + /// All metadata + std::vector< std::pair<MetaKey,MetaValue> > getMetaForNode(const ProvNode&) const; + /// Retrieve data for Key + std::vector< std::pair<ProvNode,MetaValue> > getMetaForKey(const MetaKey& key) const; + + //unsigned int numNodes() const {return m_nodes.size();} + unsigned int numLinks() const {return m_nodeconns.size();} + unsigned int numLevels() const {return m_numLevels;} + unsigned int size() const {return m_keyvalues.size();} + + private: + + /// Run through metadata and build provenance tree + void buildTree(); + + int m_numLevels; + + std::multimap<int,ProvNode> m_levelList; + std::map<ProvNode,std::vector<ProvNode> > m_tree; + //std::set<ProvNode> m_nodes; + std::set< std::pair<ProvNode,ProvNode> > m_nodeconns; + std::multimap<ProvNode,std::pair<MetaKey,MetaValue> > m_nodevalues; + std::multimap<MetaKey,std::pair<ProvNode,MetaValue> > m_keyvalues; + }; + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_COLLECTIONMETADATAPARSER + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/CollectionPool.h b/Database/APR/CollectionUtilities/CollectionUtilities/CollectionPool.h new file mode 100755 index 0000000000000000000000000000000000000000..60cd33708c03c1f9221730f69b879a57ad3a650a --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/CollectionPool.h @@ -0,0 +1,93 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef COLLECTIONUTILITIES_COLLECTIONPOOL_H +#define COLLECTIONUTILITIES_COLLECTIONPOOL_H + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/TokenList.h" + +#include <string> +#include <queue> + + +namespace pool +{ + +/* This is a utility class to prevent running out of memory when too many + output collections are created. + It keeps a "window" of open collections, closing "old" ones when a new + one needs to be opened and reopening them on demand. + Rows are cached up to a certain limit if the collection is not opened. + + @author Marcin.Nowak@cern.ch +*/ + + + class CollectionPool + { + typedef std::vector< pool::CollectionRowBuffer > rowVect_t; + + + public: + + ///iterator that flushes and closes all collections + class flushing_iterator { + friend class CollectionPool; + public: + flushing_iterator( CollectionPool* cp ) + : m_cpool( cp ), m_iter( cp->m_rowCache.begin() ), m_end( cp->m_rowCache.end() ), + m_forceOpen( false ) + {} + + void forceOpen( bool fo=true ) { m_forceOpen = fo; } + + /// NOTE - the current collection can NOT be deleted before calling ++ + flushing_iterator& operator ++ (); + bool isValid() { return m_iter != m_end; } + ICollection& operator * () { return *operator->(); } + ICollection* operator -> (); + + protected: + CollectionPool* m_cpool; + std::map< ICollection*, rowVect_t >::iterator m_iter, m_end; + bool m_forceOpen; + }; + + + + CollectionPool( unsigned maxOpen=50, unsigned cacheSize=100 ) ; + virtual ~CollectionPool(); + + /// add GUID -> collection mapping + virtual void addCollection( const std::string& guid, ICollection* coll ); + /// get back which collection corresponds to a GUID + virtual ICollection* get( const std::string& guid ); + /// insert row to a connection designated for a given GUID + virtual void insertRow( const std::string& guid , const pool::CollectionRowBuffer& row ); + + virtual pool::CollectionRowBuffer& getRowBuffer( const std::string& ); + virtual const std::string& getDstRefName(); + + protected: + + void queueOpenColl( pool::ICollection* coll ); + void reduceQueue(); + void writeCache( pool::ICollection* coll ); + + + unsigned m_maxOpen; + unsigned m_rowCacheSize; + std::queue< pool::ICollection* > m_queue; + std::map< std::string, pool::ICollection* > m_map; + std::map< pool::ICollection*, rowVect_t > m_rowCache; + pool::CollectionRowBuffer m_rowBuffer; + std::string m_dstRefName; + }; + +} + +#endif + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/DstInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/DstInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..de4dbe7de569d22e09dd7b8b636f8029608442ce --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/DstInfo.h @@ -0,0 +1,170 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_DSTINFO +#define UTILITIES_COLLECTION_DSTINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +DstInfo is an concretization of a CmdLineArgs2 + to contain output collection info. + +Qualifiers: -dst, -dstconnect + +**********************************************************/ + +namespace pool +{ + + class DstInfo : public CmdLineArgs2 + { + public: + + /// Constructors + DstInfo( ); + + virtual ~DstInfo() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + std::string connect() {return m_connection;} + + std::pair<std::string,std::string> nameAndType(unsigned int); + + std::string name(unsigned int); + std::string type(unsigned int); + + unsigned int nDst(); + + private: + std::string m_connection; + std::vector<std::string> m_NameVec; + std::vector<std::string> m_TypeVec; + + }; + + DstInfo::DstInfo( ) : CmdLineArgs2("DstInfo") + { + QualList markers; + markers.insert( std::make_pair("-dst", ArgQual(2,true,true)) ); + markers.insert( std::make_pair("-dstconnect",ArgQual()) ); + markers["-dst"].desc << "<output collection name> <output collection type> " << std::endl + << "List of name-type pairs is space separated; " + << "Choices for collection types are " << std::endl; + for (std::vector<std::string>::iterator at = this->allowedTypes().begin(); + at != this->allowedTypes().end(); + ++at) + { + markers["-dst"].desc << *at << std::endl; + } + markers["-dstconnect"].desc << "[Output database connection string] " << std::endl + << "(Note: The default value is \"\" and this argument MUST " + << "be specified for RelationalCollection." << std::endl + << "The connection string will be looked up in $CORAL_DBLOOKUP_PATH/dblookup.xml" << std::endl + << "Database user authentication is done using CORAL_AUTH_PATH/authentication.xml" << std::endl + << " or CORAL_AUTH_USER and CORAL_AUTH_PASSWORD variables" << std::endl; + this->setArgQuals(markers); + m_NameVec.clear(); + m_TypeVec.clear(); + } + + inline bool + DstInfo::evalArgs(std::vector<std::string>& argv) + { + bool retc = true; + retc = this->CmdLineArgs2::evalArgs(argv); + if (!retc) return retc; + + if ( this->hasQual("-dstconnect") ) { + int ifirst = (*this)["-dstconnect"].first; + m_connection = argv[ifirst]; + } + + if ( this->hasQual("-dst") ) { + int ifirst = (*this)["-dst"].first; + int ilast = (*this)["-dst"].second; + std::string dstCollName = ""; + std::string dstCollType = ""; + int i = ifirst; + while( i < ilast ) { + dstCollName = std::string(argv[i]); ++i; + dstCollType = std::string(argv[i]); + // First check if it is a RelationalCollection + // Or is it a file based or logical type collection + // Or is it gibberish, and they need to try again. + if ( std::find(this->allowedTypes().begin(), + this->allowedTypes().end(), + dstCollType) == this->allowedTypes().end() ) { + std::cerr << " ERROR Collection type " << dstCollType + << " is not presently handled by this program." + << std::endl; + retc = false; + } + m_NameVec.push_back( dstCollName ); + m_TypeVec.push_back( dstCollType ); + ++i; + } // end of m_nameAndTypeVec loop + } // end of -dst Qual + + // Check that connection is available for any RDB collections + if (std::find(m_TypeVec.begin(),m_TypeVec.end(),"RelationalCollection") != m_TypeVec.end() ) + { + if (m_connection.size()==0) { + std::cerr << " ERROR Found at least one RDB output collection, " + << "but -dstconnect is not set" << std::endl; + retc = false; + m_argsfine = false; + } + } + + return retc; + + } + + inline std::pair<std::string,std::string> + DstInfo::nameAndType(unsigned int ind) + { + if ( ind < m_TypeVec.size() ) + return std::pair<std::string,std::string>(m_NameVec[ind],m_TypeVec[ind]); + else { + std::cerr << "Out of range request for dst (name,type) pair" << std::endl; + return std::pair<std::string,std::string>("BAD","BAD"); + } + } + + inline std::string + DstInfo::name(unsigned int ind) + { + if ( ind < m_NameVec.size() ) return m_NameVec[ind]; + else { + std::cerr << "Out of range request for dst name" << std::endl; + return std::string("BAD"); + } + } + + inline std::string + DstInfo::type(unsigned int ind) + { + if ( ind < m_TypeVec.size() ) return m_TypeVec[ind]; + else { + std::cerr << "Out of range request for dst type" << std::endl; + return std::string("BAD"); + } + } + + inline unsigned int + DstInfo::nDst() + { + //return this->nArgs("-dst")/this->getArgQuals()["-dst"].argsize; + return this->nArgs("-dst"); + } + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_DSTINFO + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/GenericMetaHandler.h b/Database/APR/CollectionUtilities/CollectionUtilities/GenericMetaHandler.h new file mode 100644 index 0000000000000000000000000000000000000000..9b68373c2a84aa211dee28ed024162a4390ffe9c --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/GenericMetaHandler.h @@ -0,0 +1,36 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_LUMIMETAHANDLER +#define UTILITIES_COLLECTION_LUMIMETAHANDLER + +#include "CollectionUtilities/ICollMetaHandler.h" + +namespace pool { + + class GenericMetaHandler : public pool::ICollMetaHandler + { + public: + + GenericMetaHandler(std::string name="Generic") : m_name(name) {} + virtual ~GenericMetaHandler() {} + + std::string name() {return m_name;} + bool exactMatch() {return true;} + + void process(const CopyMode& mode, + MetaValue& value, + pool::ICollMetaHandler::CollectionMetadata& out, + const ProvNode& prefix, + MetaKey& key); + void Merge (MetaKey& key, MetaValue& value, MetaValue& out, const ProvNode& srcName); + void Extract(MetaKey& key, MetaValue& value, MetaValue& out, const ProvNode& srcName); + void Copy (MetaValue& value, MetaValue& out); + + private: + std::string m_name; + }; + +} +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/ICollMetaHandler.h b/Database/APR/CollectionUtilities/CollectionUtilities/ICollMetaHandler.h new file mode 100644 index 0000000000000000000000000000000000000000..d85bdd44bae888c467f3b198bd622552e03d58a8 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/ICollMetaHandler.h @@ -0,0 +1,42 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_ICOLLMETAHANDLER +#define UTILITIES_COLLECTION_ICOLLMETAHANDLER + +#include <string> +#include <map> + +// Would like to use DBDataModel/CollectionMetadata for retrieve, +// but the DataVector brings in unwanted dependencies + +namespace pool { + + class ICollMetaHandler + { + public: + // All sorts of types + typedef std::string ProvNode; + typedef std::string MetaKey; // simple string until more needed + typedef std::string MetaValue; // simple string until more needed + typedef std::map<MetaKey,MetaValue> CollectionMetadata; // would be nice if this were avail from CBase + enum CopyMode { copy, extract, merge }; + + ICollMetaHandler() {} + virtual ~ICollMetaHandler() {} + + virtual std::string name() = 0; + + // This should be what is called by the clients, the real interface + virtual void process(const CopyMode& mode, + MetaValue& value, + CollectionMetadata& out, + const ProvNode& prefix, + MetaKey& key) = 0; + + }; + +} + +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/MaxEventsInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/MaxEventsInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..ffcdf48665f4642379fdd1cb6e3ef440f90f1449 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/MaxEventsInfo.h @@ -0,0 +1,48 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_NEVENTSINFO +#define UTILITIES_COLLECTION_NEVENTSINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +MaxEventsInfo is an concretization of a CmdLineArgs2 + to contain the max number of events to process + +Qualifiers: -nevents + +**********************************************************/ + +namespace pool +{ + + class MaxEventsInfo : public CmdLineArgs2 + { + public: + + /// Constructors + MaxEventsInfo( ); + + virtual ~MaxEventsInfo() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + int get() { return m_maxEvents; } + bool specified() { return m_specified; } + + protected: + bool m_specified; + int m_maxEvents; + }; + + + +} // end pool namespace + +#endif + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/MetaInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/MetaInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..bc15c1f09530a997f240731407e4277a0d52905a --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/MetaInfo.h @@ -0,0 +1,126 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_METAINFO +#define UTILITIES_COLLECTION_METAINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" +#include "CollectionUtilities/ICollMetaHandler.h" + +#include "CollectionBase/ICollection.h" + +#include <vector> +#include <map> +#include <string> + +/********************************************************** + +Metainfo is an concretization of a CmdLineArgs2 + to process metadata-related arguments +Now also deals with metadata copying + +**********************************************************/ + +namespace pool +{ + + class MetaInfo : public CmdLineArgs2 + { + public: + /// Constructors + MetaInfo( const std::string& logname = "MetaInfo" ); + + virtual ~MetaInfo(); + + /// process options + virtual bool evalArgs(std::vector<std::string>& argv); + + /// if -nometadata option was given + virtual bool noMetadata() const; + + /// add metadata entry [key, value] + void addMetaEntry(ICollMetaHandler::MetaKey key, + ICollMetaHandler::MetaValue val, + ICollMetaHandler::ProvNode srcCollection="SELF"); + + /// how many metadata entries do we have + virtual size_t nMeta() const; + + /// the copy mode requested + ICollMetaHandler::CopyMode copyMode() const; + + /// check for conflicts between input and output collections before copying metadata + virtual bool checkMetadata( std::vector<pool::ICollection*> inputs, + std::vector<pool::ICollection*> outputs ); + + /// read metadata from source collections + virtual void readMetadata( std::vector<pool::ICollection*> inputs ); + + /// write metadata to destination collections + virtual void writeMetadata(std::vector<pool::ICollection*> outputs); + + typedef std::map< std::string, std::string >::const_iterator MetaIterator; + virtual MetaIterator begin() const; + virtual MetaIterator end() const; + + protected: + /// checks if this metadata item should be copied + bool shouldCopy( std::string key ); + + /// do not copy source metadata + bool m_noMetadata; + // Copymode: affects how metadata is being copied + ICollMetaHandler::CopyMode m_copyMode; + + /// List of metadata to keep + std::vector<std::pair<std::string,std::string> > m_wildkeepers; + + /// The accumulated metadata entries. No key duplication possible + ICollMetaHandler::CollectionMetadata m_metadata; + + /// Commandline metadata + ICollMetaHandler::CollectionMetadata m_cmdlineMetadata; + + ICollMetaHandler* m_genericHandler; + + /// options list + QualList m_markers; + + mutable coral::MessageStream m_log; + }; + + + + inline bool MetaInfo::noMetadata() const + { + return m_noMetadata; + } + + inline MetaInfo::MetaIterator MetaInfo::begin() const + { + return m_metadata.begin(); + } + + inline MetaInfo::MetaIterator MetaInfo::end() const + { + return m_metadata.end(); + } + + + inline size_t MetaInfo::nMeta() const + { + return m_metadata.size(); + } + + + inline ICollMetaHandler::CopyMode MetaInfo::copyMode() const + { + return m_copyMode; + } + +} // end pool namespace + +#endif + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/Progress.h b/Database/APR/CollectionUtilities/CollectionUtilities/Progress.h new file mode 100755 index 0000000000000000000000000000000000000000..646af7e718fa8896262d4ee4f685e285f167e953 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/Progress.h @@ -0,0 +1,49 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_PROGRESS +#define UTILITIES_COLLECTION_PROGRESS + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +Progress indicator options and implementation +Qualifiers: -progress <filename> + +**********************************************************/ + +#include <fstream> + +namespace pool +{ + + class Progress : public CmdLineArgs2 + { + public: + + /// Constructors + Progress( ); + + virtual ~Progress() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + void print( const std::string& action, int percentage ); + + + private: + bool m_valid; + bool m_stdout; + std::string m_outFName; + std::ofstream m_file; + }; + +} // end pool namespace + +#endif + + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/QueryInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/QueryInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..390fb6d036f27725296155b79a85a476f68fa8ad --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/QueryInfo.h @@ -0,0 +1,58 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_QUERYINFO +#define UTILITIES_COLLECTION_QUERYINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +#include "TrigCollQuery/TrigCollQuery.h" + +/********************************************************** + +QueryInfo is an concretization of a CmdLineArgs2 + to contain query info. + +Qualifiers: -query, -queryopt + +**********************************************************/ + +namespace pool +{ + + class QueryInfo : public CmdLineArgs2 + { + public: + + /// Constructors + QueryInfo( ); + + virtual ~QueryInfo() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + bool evalArgsPass2(std::vector<std::string>& argv, pool::Args2Container& container); + + /// return the query string. + const std::string& query( size_t qn=0 ); + size_t nQuery() const { return m_queries.size(); } + const std::vector<std::string>& queries() const { return m_queries; } + + const std::string& queryOptions() const { return m_queryOptions; } + + private: + std::vector<std::string> m_queries; + std::string m_queryOptions; + + TrigCollQuery queryRemapper; + + }; + + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_QUERYINFO + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/SrcInfo.h b/Database/APR/CollectionUtilities/CollectionUtilities/SrcInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..5d51a1f99dc6aaecdd658cc5ccd796323893e984 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/SrcInfo.h @@ -0,0 +1,175 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef UTILITIES_COLLECTION_SRCINFO +#define UTILITIES_COLLECTION_SRCINFO + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +SrcInfo is an concretization of a CmdLineArgs2 + to contain input collection info. + +Qualifiers: -src, -srcconnect + +**********************************************************/ + +namespace pool +{ + + class SrcInfo : public CmdLineArgs2 + { + public: + + /// Constructors + SrcInfo( ); + + virtual ~SrcInfo() {} + + /// + bool evalArgs(std::vector<std::string>& argv); + + std::string connect() {return m_connection;} + + std::pair<std::string,std::string> nameAndType(unsigned int); + + std::string name(unsigned int); + std::string type(unsigned int); + + unsigned int nSrc(); + + private: + std::string m_connection; + std::vector<std::string> m_NameVec; + std::vector<std::string> m_TypeVec; + + }; + + + + inline + SrcInfo::SrcInfo( ) : CmdLineArgs2("SrcInfo") + { + QualList markers; + markers.insert( std::make_pair("-src", ArgQual(2,true,true)) ); + markers.insert( std::make_pair("-srcconnect",ArgQual()) ); + markers["-src"].desc << "<input collection name> <input collection type> " << std::endl + << "List of name-type pairs is space separated; " + << "Choices for collection types are " << std::endl; + for (std::vector<std::string>::iterator at = this->allowedTypes().begin(); + at != this->allowedTypes().end(); + ++at) + markers["-src"].desc << *at << std::endl; + markers["-srcconnect"].desc << "[Input database connection string] " << std::endl + << "(Note: The default value is \"\" and this argument MUST " + << "be specified for RelationalCollection." << std::endl + << "The connection string will be looked up in $CORAL_DBLOOKUP_PATH/dblookup.xml" << std::endl + << "Database user authentication is done using CORAL_AUTH_PATH/authentication.xml" << std::endl + << " or CORAL_AUTH_USER and CORAL_AUTH_PASSWORD variables" << std::endl; + this->setArgQuals(markers); + m_NameVec.clear(); + m_TypeVec.clear(); + } + + inline bool SrcInfo::evalArgs(std::vector<std::string>& argv) + { + // Call underlying parser in the base class + bool retc = this->CmdLineArgs2::evalArgs(argv); + if(!retc) return retc; + + if ( this->hasQual("-srcconnect") ) { + int ifirst = (*this)["-srcconnect"].first; + m_connection = argv[ifirst]; + } + + if ( this->hasQual("-src") ) { + int ifirst = (*this)["-src"].first; + int ilast = (*this)["-src"].second; + std::string srcCollName = ""; + std::string srcCollType = ""; + int i = ifirst; + while( i < ilast ) { + srcCollName = std::string(argv[i]); ++i; + srcCollType = std::string(argv[i]); + // First check if it is a RelationalCollection + // Or is it a file based or logical type collection + // Or is it gibberish, and they need to try again. + if ( std::find(this->allowedTypes().begin(), + this->allowedTypes().end(), + srcCollType) == this->allowedTypes().end() ) { + std::cerr << " ERROR Collection type " << srcCollType + << " is not presently handled by this program." + << std::endl; + retc = false; + } + m_NameVec.push_back( srcCollName ); + m_TypeVec.push_back( srcCollType ); + ++i; + } // end of m_nameAndTypeVec loop + } // end of -src Qual + + // Check that connection is available for any RDB collections + if(std::find(m_TypeVec.begin(),m_TypeVec.end(),"RelationalCollection") != m_TypeVec.end() ) + { + if(m_connection.size()==0) { + std::cerr << " ERROR Found at least one RDB source collection, " + << "but -srcconnect is not set" << std::endl; + retc = false; + m_argsfine = false; + } + } + + return retc; + + } + + inline std::pair<std::string,std::string> + SrcInfo::nameAndType(unsigned int ind) + { + if ( ind < m_TypeVec.size() ) + return std::pair<std::string,std::string>(m_NameVec[ind],m_TypeVec[ind]); + else { + std::cerr << "Out of range request for src (name,type) pair" << std::endl; + return std::pair<std::string,std::string>("BAD","BAD"); + } + } + + inline std::string + SrcInfo::name(unsigned int ind) + { + if ( ind < m_NameVec.size() ) return m_NameVec[ind]; + else { + std::cerr << "Out of range request for src name" << std::endl; + return std::string("BAD"); + } + } + + inline std::string + SrcInfo::type(unsigned int ind) + { + if ( ind < m_TypeVec.size() ) return m_TypeVec[ind]; + else { + std::cerr << "Out of range request for src type" << std::endl; + return std::string("BAD"); + } + } + + inline unsigned int + SrcInfo::nSrc() + { + return this->nArgs("-src"); + } + +} // end pool namespace + +#endif // UTILITIES_COLLECTION_SRCINFO + + + + + + + diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/TagMetadataKey.h b/Database/APR/CollectionUtilities/CollectionUtilities/TagMetadataKey.h new file mode 100644 index 0000000000000000000000000000000000000000..70eb634076d95926a89e8d6934d51b03bc781bd8 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/TagMetadataKey.h @@ -0,0 +1,187 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TAGMETADATAKEY_H +#define TAGMETADATAKEY_H + +/////////////////////////////////////////////////////////////////////// +// +// @class TagMetadataKey +// @author Jack.Cranshaw@cern.ch +// +// Class to translate a key+qualifier into a string +// to use as a single key with CollectionMetadata. +// Followup tools can then 'key' off of the key or key+qualifier +// Example: r11111_thisStuff_bit6 key = thisStuff, qual = (r=11111,b=6) +// +/////////////////////////////////////////////////////////////////////// + +#include <algorithm> +#include <iostream> +#include <sstream> +#include <string> + +namespace pool { + +class TagMetadataKey +{ +public: + //friend class ICollectionMetadataHelper; + static std::string sep() {return ";";} + static std::string keymark() {return "Key@";} + static std::string qualmark() {return "Qual@";} + static std::string iovmark() {return "IOV@";} + static std::string vermark() {return "Ver@";} + + TagMetadataKey(); + TagMetadataKey(std::string key, std::string qual="", bool versioned=false); + + void setEncoded(bool flag); + void setKey(std::string key); + void setQual(std::string qual); + void setIOV(std::string iov); + void setVersion(std::string ver); + + bool isVersioned() {return m_verstat;} + + bool encoded() const; + std::string key() const; + std::string qual() const; + std::string iov() const; + std::string ver() const; + + bool fromString(std::string comp); + std::string toString() const; + +private: + std::string getpayload(const std::string key, const std::string comp); + + std::string m_key; + std::string m_qual; + std::string m_iov; + std::string m_ver; + bool m_verstat; // verstat is true if the key is ALLOWED to be versioned + bool m_encoded; +}; + +inline +TagMetadataKey::TagMetadataKey() : m_verstat(false), m_encoded(true) +{} + +inline +TagMetadataKey::TagMetadataKey(std::string key, std::string qual, bool versioned) : + m_key(key), m_qual(qual), m_verstat(versioned), m_encoded(true) +{if (m_verstat) m_ver="0";} + +inline void +TagMetadataKey::setEncoded(bool flag) {m_encoded=flag;} + +inline void +TagMetadataKey::setKey(std::string key) {m_key=key;} + +inline void +TagMetadataKey::setQual(std::string qual) {m_qual=qual;} + +inline void +TagMetadataKey::setIOV(std::string iov) {m_iov=iov;} + +inline void +TagMetadataKey::setVersion(std::string ver) {m_ver = ver;} + +inline bool +TagMetadataKey::encoded() const {return m_encoded;} + +inline std::string +TagMetadataKey::key() const {return m_key;} + +inline std::string +TagMetadataKey::qual() const {return m_qual;} + +inline std::string +TagMetadataKey::iov() const {return m_iov;} + +inline std::string +TagMetadataKey::ver() const {return m_ver;} + +inline bool +TagMetadataKey::fromString(std::string comp) +{ + bool ready = false; + char sp = sep()[0]; + int nseps = std::count(comp.begin(),comp.end(),sp); + // All marks should be denoted with an @ + int nmarks = std::count(comp.begin(),comp.end(),'@'); + //std::cout << "seps =" << nseps <<" marks=" << nmarks << std::endl; + if (nmarks > 0 && nseps == nmarks) { + // first check for key. if no key then ready is false + std::string payload = getpayload(keymark(),comp); + if (payload != "BAD") { + m_encoded=true; + m_key = payload; + // check for qualifier + payload = getpayload(qualmark(),comp); + if (payload != "BAD") m_qual = payload; + else m_qual.clear(); + // check for iov + payload = getpayload(iovmark(),comp); + if (payload != "BAD") m_iov = payload; + else m_iov.clear(); + // check for version + payload = getpayload(vermark(),comp); + if (payload != "BAD") m_ver = payload; + else m_ver.clear(); + ready = true; + } + } + else { + m_key = comp; + m_encoded = false; + std::cerr << "WARNING: No markers found in " + << comp << ". Treating as unencoded single key." << std::endl; + } + return ready; +} + +inline std::string +TagMetadataKey::toString() const +{ + std::stringstream out; + if (m_encoded) { + out << keymark() << m_key << sep(); + if (m_qual.size()>0) out << qualmark() << m_qual << sep(); + if (m_iov.size()>0) out << iovmark() << m_iov << sep(); + if (m_verstat && m_ver.size()>0) { + out << vermark() << m_ver << sep(); + } + } + else out << m_key; + return out.str(); +} + +inline std::string +TagMetadataKey::getpayload(const std::string key, const std::string comp) +{ + std::string bad("BAD"); + std::string payload; + // Look for named key being considered + std::string::size_type start = comp.find(key); + if (start != std::string::npos) { + // if key exists, then look for endmarker, need at least one after the @ + // starting at key name position + std::string::size_type payend = comp.find(sep(),start); + std::string::size_type paystart = comp.find('@',start); + if (paystart<payend) { + // grab substring between @ and sep() + payload = comp.substr(start+keymark().size(),payend-start-keymark().size()); + } + else { + std::cerr << "WARNING: missing sep() for key" << std::endl; + return bad; + } + } + return payload; +} + +} // namespace pool +#endif diff --git a/Database/APR/CollectionUtilities/CollectionUtilities/UtilityFuncs.h b/Database/APR/CollectionUtilities/CollectionUtilities/UtilityFuncs.h new file mode 100644 index 0000000000000000000000000000000000000000..f69601a18b784989dbf454fbbf5ae783989c5304 --- /dev/null +++ b/Database/APR/CollectionUtilities/CollectionUtilities/UtilityFuncs.h @@ -0,0 +1,54 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file UtilityFuncs.h + * @brief prototypes for utility POOL collection funcs + * @author Jack.Cranshaw@cern.ch + * $Id: UtilityFuncs.h 495625 2012-04-13 13:54:17Z mnowak $ + */ +#include "CollectionBase/ICollectionDescription.h" + +#include <string> +#include <map> +#include <set> +#include <vector> + +namespace pool { + + template <class KEY, class PAYLOAD> + std::vector<PAYLOAD> vectorizeMapPayload(const std::map<KEY,PAYLOAD> m) + { + std::vector<PAYLOAD> outputs; + for (typename std::map<KEY,PAYLOAD>::const_iterator j = m.begin(); j != m.end(); ++j) { + outputs.push_back(j->second); + } + return outputs; + } + + // Funcs for manipulating CollectionMetadata + std::string getKey(const std::string key, const std::string encoded); + std::string getGroup(const std::string encoded); + std::set<std::string> findGroups(const pool::ICollectionDescription& desc); + + // Funcs for manipulating CollectionDescriptions + void dumpAttributes(const pool::ICollectionDescription& description); + void dumpTokens(const pool::ICollectionDescription& description); + void dumpGroups(const pool::ICollectionDescription& description); + void dumpOverlapEval(const pool::ICollectionDescription& desc0, + const pool::ICollectionDescription& desc1, + const std::vector< std::pair<std::string,std::string> >& spec0, + const std::vector< std::pair<std::string,std::string> >& spec1, + const std::string spectype); + + // Formatting + std::pair<unsigned int,unsigned int> + getColumnWidths(const pool::ICollectionDescription& description, bool tokens); + + void Tokenize(const std::string& str, + std::vector<std::string>& tokens, + const std::string& delimiters = " "); + +} + diff --git a/Database/APR/CollectionUtilities/cmt/requirements b/Database/APR/CollectionUtilities/cmt/requirements new file mode 100644 index 0000000000000000000000000000000000000000..bb3e09ad4a963ffb19fd9092d6d751c61708c142 --- /dev/null +++ b/Database/APR/CollectionUtilities/cmt/requirements @@ -0,0 +1,60 @@ +package CollectionUtilities + +# imported to ATLAS by: +author Marcin Nowak + +use AtlasPolicy AtlasPolicy-* +use AtlasCORAL AtlasCORAL-* External +use PersistentDataModel PersistentDataModel-* Database +use CollectionBase CollectionBase-* Database/APR +use TrigCollQuery TrigCollQuery-* Database/APR + + +#========== UTIL LIBRARY +library CollectionUtilities -no_prototypes \ + "../src/ArgQual.cpp ../src/CmdLineArgs2.cpp ../src/Args2Container.cpp \ + ../src/CollectionMetadataParser.cpp ../src/CatalogInfo.cpp \ + ../src/CollectionPool.cpp ../src/CollSplitByGUIDBase.cpp \ + ../src/Progress.cpp ../src/CollAppendBase.cpp ../src/UtilityFuncs.cpp \ + ../src/MaxEventsInfo.cpp ../src/QueryInfo.cpp ../src/MetaInfo.cpp" \ + ../src/GenericMetaHandler.cpp ../src/CollMetaRegistry.cpp + +apply_pattern installed_library + +#========== UTILITIES +# avoid the .exe extension for applications +pattern application_alias \ + alias <application> "<application>" + +pattern collection_utility \ + application <name> ../utilities/<name>.cpp application_suffix="" ; \ + private ; \ + macro <name>_dependencies "CollectionUtilities" ; \ + end_private +# made private to avoid clash for Database/AthenaPOOL/POOLCollectionTools +# having applications with the same names + +apply_pattern collection_utility name=coll_insertGuidToCatalog +apply_pattern collection_utility name=CollAppend +apply_pattern collection_utility name=CollListAttrib +apply_pattern collection_utility name=CollListFileGUID +apply_pattern collection_utility name=CollListMetadata +apply_pattern collection_utility name=CollListPFN +apply_pattern collection_utility name=CollListToken +apply_pattern collection_utility name=CollQuery +apply_pattern collection_utility name=CollRemove +apply_pattern collection_utility name=CollSplitByGUID +apply_pattern collection_utility name=TokenExtractor + +apply_pattern declare_scripts files="../scripts/*.exe ../scripts/*.py" + +private +use AtlasXercesC AtlasXercesC-* External +use AtlasBoost AtlasBoost-* External +use POOLCore POOLCore-* Database/APR +use PersistencySvc PersistencySvc-* Database/APR +use FileCatalog FileCatalog-* Database/APR +use yampl yampl-* External + +macro_append CollectionUtilities_shlibflags " -lcurl" +macro_append CollectionUtilities_linkopts " -lcurl" \ No newline at end of file diff --git a/Database/APR/CollectionUtilities/scripts/CollAppend.bak b/Database/APR/CollectionUtilities/scripts/CollAppend.bak new file mode 100755 index 0000000000000000000000000000000000000000..4e04e9760393b705eb5888cb853ea1b861d270d5 --- /dev/null +++ b/Database/APR/CollectionUtilities/scripts/CollAppend.bak @@ -0,0 +1,3 @@ +#!/bin/sh + +CollAppend "$@" diff --git a/Database/APR/CollectionUtilities/scripts/CollListFileGUID.exe b/Database/APR/CollectionUtilities/scripts/CollListFileGUID.exe new file mode 100755 index 0000000000000000000000000000000000000000..bc4f1f79b935614042201ba1df1d45bec0fedc3b --- /dev/null +++ b/Database/APR/CollectionUtilities/scripts/CollListFileGUID.exe @@ -0,0 +1,3 @@ +#!/bin/sh + +CollListFileGUID "$@" diff --git a/Database/APR/CollectionUtilities/scripts/CollQuery.exe b/Database/APR/CollectionUtilities/scripts/CollQuery.exe new file mode 100755 index 0000000000000000000000000000000000000000..e1e23c56b84f04884a03eddec9c6d3d02dc8dc09 --- /dev/null +++ b/Database/APR/CollectionUtilities/scripts/CollQuery.exe @@ -0,0 +1,3 @@ +#!/bin/sh + +CollQuery "$@" diff --git a/Database/APR/CollectionUtilities/scripts/CollSplitByGUID.exe b/Database/APR/CollectionUtilities/scripts/CollSplitByGUID.exe new file mode 100755 index 0000000000000000000000000000000000000000..8294f192eabbf734da797c13b082a6b0e9a7f0d7 --- /dev/null +++ b/Database/APR/CollectionUtilities/scripts/CollSplitByGUID.exe @@ -0,0 +1,3 @@ +#!/bin/sh + +CollSplitByGUID "$@" diff --git a/Database/APR/CollectionUtilities/scripts/collListGuids.py b/Database/APR/CollectionUtilities/scripts/collListGuids.py new file mode 100755 index 0000000000000000000000000000000000000000..4e6902d30ae1b120bb009a656c03d61c2eb1c980 --- /dev/null +++ b/Database/APR/CollectionUtilities/scripts/collListGuids.py @@ -0,0 +1,99 @@ +#!/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# @author Marcin Nowak +# @date 08.2012 +# @brief example of how to extract data from Collections in Python +# +# this example/utility prints EventNumber and all file GUIDs for this Event +# supports queries (without trigger bits decoding though) +# + +import PyCintex +PyCintex.Cintex.Enable() +import ROOT +ROOT.gROOT.SetBatch(True) +import re,sys + +def toiter (beg, end): + while beg != end: + #yield beg.__deref__() + yield beg + beg.__preinc__() + return + +def toiter1 (c): + return toiter (c.begin(), c.end()) + + +query_string = '' +for i in range(1, len(sys.argv) ): + if sys.argv[i] == '-src': + collection_name = sys.argv[i+1] + i += 1 + collection_name = re.sub('\.root$', '', collection_name) + if sys.argv[i] == '-query': + query_string = sys.argv[i+1] + i += 1 + if sys.argv[i] == '-h': + print "Currently implemented options: -src -query" + + +colsvc = ROOT.pool.CollectionService() +read_only_flag = True +col=colsvc.open(collection_name, 'RootCollection', '', read_only_flag) + +des = col.description() +attributes = map( lambda n: des.attributeColumn(n, 0).name(), range(0, des.numberOfAttributeColumns()) ) +if 'RunNumber' not in attributes or 'EventNumber' not in attributes: + print "RunNumber/EventNumber attribute not found in the collection" + exit(1) + +#for c in range(0, des.numberOfAttributeColumns()): +# print "Attribute: ", des.attributeColumn(c, 0).name(), " type:", des.attributeColumn(c, 0).type() +#for c in range(0, des.numberOfTokenColumns()): +# print "Reference: ", des.tokenColumn(c, 0).name() + +query = col.newQuery() +query.selectAll(); +query.setCondition( query_string ) +#query.addToOutputList( des.attributeColumn(0, 0).name() ) +#query.addToOutputList( des.attributeColumn(1, 0).name() ) + +cursor = query.execute() + +if( cursor.next() ): + row = cursor.currentRow() + + tlist = row.tokenList() + alist = row.attributeList() + print "Attributes # = ", alist.size(), " References # = ", tlist.size() + + for i in range(0,alist.size()): + attr_name = alist[i].specification().name() + + queryopt = ['RunNumber', 'EventNumber'] + + attrdata_func_name = {} + line = '' + for atr in queryopt: + attrdata_func_name[atr] = 'data<' + alist[atr].specification().typeName() + '>' + line += "%10.10s " % atr + for t in toiter1(tlist): + line += "%38.38s " % t.tokenName() + print line + + has_row = True + while has_row: + alist = cursor.currentRow().attributeList() + tlist = cursor.currentRow().tokenList() + line = '' + for atr in queryopt: + line += "%10.10s " % str(getattr(alist[atr], attrdata_func_name[atr])()) + for i in range(0, tlist.size()): + line += "%38.38s " % tlist[i].dbID() + print line + has_row = cursor.next() + +col.close() diff --git a/Database/APR/CollectionUtilities/src/ArgQual.cpp b/Database/APR/CollectionUtilities/src/ArgQual.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5f26a80a5aa9042d39615be4a575428d1463f20 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/ArgQual.cpp @@ -0,0 +1,59 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/ArgQual.h" + +/********************************************************** + +ArgQual is a class version of a struct + +Purpose: Contain all information on the state of + input for a command line argument qualifier + + argsize: cardinality of the qualifier arguments + multiarg: whether there can be multiple units + of the cardinality + required: whether the argument is required + desc: a string stream with a readable description + of the qualifier and it's defaults + +Example of Usage with CmdLineArgs2: + + Args2Container argsVec(thisProgram); + + QualList markers; + markers.insert( make_pair("-somequal", ArgQual()) ); + markers["-somequal"].desc << "this is a qualifier with default=this"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + cmdLineArgs.evalArgs(argc,argv); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + ... + + % someProgram.exe -somequal thisval + +**********************************************************/ + +using namespace pool; + +void ArgQual::print(std::ostream& theStream) const +{ + if (required) theStream << "(REQUIRED) "; + else theStream << "(OPTIONAL) "; + if (multiarg) theStream << "Takes arguments in groups of " << argsize << std::endl; + else theStream << "Takes " << argsize << " arguments" << std::endl; + if (desc.str().size()>0) theStream << "Format: " << desc.str() << std::endl; + else theStream << "NO DESCRIPTION FOR THIS QUALIFIER" << std::endl; +} + +std::ostream& operator<<(std::ostream& theStream, ArgQual& rhs) { + if (rhs.multiarg) theStream << "Takes arguments in groups of " << rhs.argsize << std::endl; + else theStream << "Takes " << rhs.argsize << " arguments" << std::endl; + if (rhs.desc.str().size()>0) theStream << rhs.desc.str() << std::endl; + else theStream << "NO DESCRIPTION FOR THIS QUALIFIER" << std::endl; + return theStream; +} + diff --git a/Database/APR/CollectionUtilities/src/Args2Container.cpp b/Database/APR/CollectionUtilities/src/Args2Container.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52f53cb5366158f086534b417fd7a73f4516bf10 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/Args2Container.cpp @@ -0,0 +1,770 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include <xercesc/dom/DOM.hpp> +#include <xercesc/dom/DOMImplementation.hpp> +#include <xercesc/dom/DOMImplementationLS.hpp> +#if XERCES_VERSION_MAJOR < 3 +#include <xercesc/dom/DOMWriter.hpp> +#else +#include <xercesc/dom/DOMLSSerializer.hpp> +#endif +#include <xercesc/dom/DOMNode.hpp> +#include <xercesc/parsers/XercesDOMParser.hpp> + +#include <xercesc/sax/SAXException.hpp> + +#include <xercesc/framework/StdOutFormatTarget.hpp> +#include <xercesc/framework/LocalFileFormatTarget.hpp> +#include <xercesc/framework/StdOutFormatTarget.hpp> +#include <xercesc/framework/LocalFileFormatTarget.hpp> + +#include <xercesc/util/XMLUni.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/util/XMLUni.hpp> +#include <xercesc/util/OutOfMemoryException.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/util/TransService.hpp> + +//#include <xercesc/parsers/XercesDOMParser.hpp> + +#include <ctime> +#include <stdexcept> + +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" +#include "CollectionUtilities/Args2Container.h" + +#define corENDL coral::MessageStream::endmsg + +XERCES_CPP_NAMESPACE_USE + +using namespace pool; +using namespace std; + +// Stole this little snippet from OOPWeb.com +// http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html +// to fill in a basic failure in std::string +// Essentially it does for C++ what strtok does for C + +void Tokenize(const std::string& str, + std::vector<std::string>& tokens, + const std::string& delimiters = " ") +{ + // Skip delimiters at beginning. + std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + std::string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) + { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } +} + +/*************************************************************** + +Args2Container is an extension of a vector of CmdLineArgs2 + +It has a name and a desc. Name is normally the program name. +It also adds several utility methods such as + +printHelp(verbose) : + - (verbose = false) print out names of ArgQuals + for all contained pointers + - (verbose = true) print out names _and_ desc of ArgQuals + for all contained pointers + +checkValid() : Loop over pointers and call valid() method + +evalArgs(argv) : save time/errors by calling evalArgs + on the container rather than the individual CmdLineArg2's + +***************************************************************/ +Args2Container::Args2Container( std::string name, bool doxml, coral::MessageStream *l ) + : m_log( l? *l : *new coral::MessageStream("CmdLine") ), + m_name(name), + m_xml(doxml), + m_xmlOpt( FILEONLY ), + m_ver( V2 ), + m_a2c_cla2("local"), + m_xFileName(name) +{ + QualList markers; + markers["-help"] = ArgQual("Print detailed description of all options", 0); + markers["-h"] = markers["-help"]; + markers["-xmlMode"].desc << "How to merge xml and cli arguments when both present. " + << "Options = (FILEONLY, OVERRIDE, APPEND) DEFAULT = OVERRIDE"; + markers["-xmlInput"].desc << "This takes a file name as input. " << std::endl + << "NOTE: The CLI args will be discarded! "; + markers["-xmlOut"].desc << "This takes a name for the xml file with the arguments " + << "Default = utility name, e.g util.xml"; + markers["-compatibility"].desc << "If this is set, then it uses the old xml format" << std::endl + << " Options are : "; + for (unsigned int i=0; i<NUM_VERSIONS; ++i) { + markers["-compatibility"].desc << " V" << i+1; + } + m_a2c_cla2.setArgQuals(markers); + this->push_back(&m_a2c_cla2); +} + +void Args2Container::init() +{ + for( Args2Container::iterator it = this->begin(); it != this->end(); ++it ) + (*it)->init(); +} + +//----------------------------------------------------------------------- +// printHelp : loop over contents and print name (and desc) of qualifiers +//----------------------------------------------------------------------- +void Args2Container::printHelp(bool verbose) const +{ + std::cerr << "Usage: " << m_name << std::endl; + if (desc.str().size()>0) + std::cerr << desc.str() << std::endl; + for (Args2Container::const_iterator cmdit = this->begin(); cmdit != this->end(); ++cmdit) + { + for (QualList::const_iterator it = (*cmdit)->getArgQuals().begin(); + it != (*cmdit)->getArgQuals().end(); + ++it) + { + std::cerr << "Option = " << it->first << " "; + if ( verbose ) it->second.print(std::cerr); + std::cerr << std::endl; + } + std::cerr << std::endl; + } +} + +bool Args2Container::checkValid() const +{ + for (Args2Container::const_iterator ait = this->begin(); ait != this->end(); ++ait) { + if ( (*ait)->valid() == false ) { + std::cerr << "Invalid Qualifier" << std::endl; + return false; + } + } + return true; +} + +// +// Evaluate the argv vector for a named argument +// +bool Args2Container::evalArgs(const std::string cliarg, std::vector<std::string>& argv) +{ + // Evaluate the args for the Args2Container local cla2 + if( ! m_a2c_cla2.evalArgs(argv) ) { + m_log << coral::Error << "Args2Container has invalid internal argument" << corENDL; + return false; + } + // Print out detailed help if requested + if( m_a2c_cla2.hasQual("-help") || m_a2c_cla2.hasQual("-h") ) { + printHelp(true); + return false; + } + // Print out simple help if not enough options given + if( argv.size() < 2 ) { + printHelp(false); + return false; + } + + // If this is OK then check if xmlInput is one of the options + bool xmlpresent = false; + CmdLineArgs2::iterator clit = m_a2c_cla2.begin(); + while (clit!=m_a2c_cla2.end()) { + if (clit->first == "-compatibility") { + if (argv[clit->second.first]=="V1") m_ver=V1; + else if (argv[clit->second.first]=="V2") m_ver=V2; + else { m_log << coral::Error << "compatibility version " << argv[clit->second.first] + << " not supported" << corENDL; } + } + if (clit->first == "-xmlOut") { + m_xFileName = argv[clit->second.first]; + m_log << coral::Info << "Will use " << m_xFileName << " for output xml filename" + << corENDL; + } + if (clit->first == "-xmlMode") { + std::string mode = argv[clit->second.first]; + if (mode=="FILEONLY") m_xmlOpt = FILEONLY; + else if (mode=="OVERRIDE") m_xmlOpt = OVERRIDE; + else if (mode=="APPEND") m_xmlOpt = APPEND; + else { + m_log << coral::Warning << "Unrecognized mode : " << mode << corENDL; + } + } + ++clit; + } + // Don't rewrite actual argv with xml until all other CLI arguments have been checked + // ---> Use second loop + std::string file; + if( m_a2c_cla2.getOpt("-xmlInput", file) ) { + m_log << coral::Info << "Taking input from xml file: " << file << corENDL; + //argv = this->fillCmdLineArgsFromXML(file); + xmlpresent = true; + } + + if (xmlpresent) { + // if xml parameter are present, then need to decide how to + // merge them with any cli parameters + + // Step 1 : Get evaluation of argv from cli + std::string exeName = argv[0]; + + // If Args2Container is valid, then eval args for all content cla2's + for (Args2Container::const_iterator ait = this->begin(); ait != this->end(); ++ait) { + if ( cliarg=="ALL" || (*ait)->name()==cliarg) { + (*ait)->ignoreMissingArgs( true ); + (*ait)->evalArgs(argv); + (*ait)->ignoreMissingArgs( false ); + } + } + //for (int l = 0; l < argv.size(); ++l) std::cout << l << " | " << argv[l] << std::endl; + + // Step 2 : put the markers plus contents for the cli evalArgs into a map + + std::map<std::string,std::vector<std::string> > cliArgFrags; + for (Args2Container::const_iterator cl_it = this->begin(); cl_it != this->end(); ++cl_it) { + for ( CmdLineArgs2::iterator it = (*cl_it)->begin(); it != (*cl_it)->end(); ++it) { + std::string marker = it->first; + std::vector<std::string> list; + //std::cout << "In cliarg loop for " << marker << std::endl; + for (int index = it->second.first; index < it->second.second; ++index) { + list.push_back(argv[index]); + //std::cout << "In cliarg loop : index " << index << " : " << argv[index] << std::endl; + } + cliArgFrags.insert(std::make_pair(marker,list)); + } + } + + // Step 3 : remake argv based on xml contents and re-evaluate arguments + + // If Args2Container is valid, then eval args for all content cla2's + argv = this->fillCmdLineArgsFromXML(file); + // Now check whether any merging is needed + if (m_xmlOpt != FILEONLY) { + //std::cout << "I am processing non fileonly option " << m_xmlOpt << std::endl; + this->init(); + for (Args2Container::const_iterator ait = this->begin(); ait != this->end(); ++ait) { + if ( cliarg=="ALL" || (*ait)->name()==cliarg) { + //bool disregardMe = (*ait)->evalArgs(argv); + (*ait)->evalArgs(argv); + } + } + + // Step 4 : put the markers plus contents for the xml evalArgs into a map + //for (int l = 0; l < argv.size(); ++l) std::cout << l << "::" << argv[l] << std::endl; + + std::map<std::string,std::vector<std::string> > xmlArgFrags; + for (Args2Container::const_iterator cl_it = this->begin(); cl_it != this->end(); ++cl_it) { + for ( CmdLineArgs2::iterator it = (*cl_it)->begin(); it != (*cl_it)->end(); ++it) { + std::string marker = it->first; + std::vector<std::string> list; + for (int index = it->second.first; index < it->second.second; ++index) { + list.push_back(argv[index]); + //std::cout << "In xmlarg loop : index " << index << " : " << argv[index] << std::endl; + } + xmlArgFrags.insert(std::make_pair(marker,list)); + } + } + + // Step 5 : using config parameters merge the xml and cli map contents into a new argv + + std::vector<std::string> argv_new; + // Loop over command line map + for (std::map<std::string,std::vector<std::string> >::iterator it = cliArgFrags.begin(); + it != cliArgFrags.end(); ++it) { + std::map<std::string,std::vector<std::string> >::iterator fit = xmlArgFrags.find(it->first); + // if ovveride, then replace xml map with commandline map + if (fit != xmlArgFrags.end() && m_xmlOpt == OVERRIDE) fit->second = it->second; + // if append, then add commandline args to xml args + if (fit != xmlArgFrags.end() && m_xmlOpt == APPEND) { + for (unsigned int j = 0; j < it->second.size(); ++j) + fit->second.push_back( (it->second)[j] ); + } + } + // Now take xmlArgFrags as the merged map and rebuild the argv from its contents + argv_new.push_back(exeName); + for (std::map<std::string,std::vector<std::string> >::iterator it =xmlArgFrags.begin(); + it != xmlArgFrags.end(); ++it) { + argv_new.push_back(it->first); + for (unsigned int j = 0; j < it->second.size(); ++j) argv_new.push_back( (it->second)[j] ); + } + argv = argv_new; + //std::cout << "Now dump merged argv" << std::endl; + //for (unsigned int j = 0; j < argv.size(); ++j) std::cout << " >> " << argv[j] << std::endl; + } + } + + // POST ARGV MANIPULATION + + // If Args2Container is valid, then eval args for all content cla2's + this->init(); + //for (int l = 0; l < argv.size(); ++l) std::cout << l << " | " << argv[l] << std::endl; + for (Args2Container::const_iterator ait = this->begin(); ait != this->end(); ++ait) { + if ( cliarg=="ALL" || (*ait)->name()==cliarg) { + if ( (*ait)->evalArgs(argv) == false ) { + m_log << coral::Error << "Unable to eval args for " << (*ait)->name() << corENDL; + return false; + } + } + } + for( Args2Container::const_iterator ait = begin(); ait != end(); ++ait ) { + if ( cliarg=="ALL" || (*ait)->name()==cliarg) { + if ( (*ait)->evalArgsPass2(argv, *this) == false ) { + m_log << coral::Error << "Unable to eval args (pass2) for " << (*ait)->name() << corENDL; + return false; + } + } + } + + // If output xml is requested ... + // Note that it always contains what was finally evaluated, NOT just the command line + if (doXML()) { + try { + XMLPlatformUtils::Initialize(); + } + catch( const XMLException& ) { + m_log << coral::Error << "Error during initialization! :" << corENDL << corENDL; + } + writeXMLContent(argv); + } + return true; +} + +// +// Evaluate the argv vector for ALL contents +// +bool Args2Container::evalArgs(std::vector<std::string>& argv) +{ + // MN: this is a hack to allow users skip the pesky RootCollection type specifier + + // don't try iterators, they are invalidated by insertion + bool edited = false; + for( size_t i=0; i<argv.size(); ) { + if( argv[i] == "-src" || argv[i] == "-dst" ) { + ++i; + while( i < argv.size() && argv[i][0] != '-' ) { + size_t len = argv[i].length(); + if( len>5 && argv[i].substr(len-5,5) == ".root" ) { + if( i+1 == argv.size() || (argv[i+1] != "RootCollection" && argv[i+1] != "PHYSICAL_NAME" && argv[i+1] != "LOGICAL_NAME") ) { + // add the missing "RootCollection" + argv.insert( argv.begin()+i+1, "RootCollection" ); + edited = true; + if( argv[i].substr(0,4) != "PFN:" ) { + // and cut off the .root extension + argv[i] = argv[i].substr(0,len-5); + } + } + } + i++; + } + } else i++; + } + + if( edited ) { + m_log << coral::Warning << "Fixed command line options for RootCollections. New arguments are: " << corENDL << "> "; + size_t i=0; + while( i < argv.size() ) { + m_log << coral::Warning << argv[i] << " " ; ++i; + } + m_log << coral::Warning << corENDL; + } + + if( !evalArgs("ALL", argv) ) return false; + m_argv = argv; + + QualList all_opts; + for( Args2Container::const_iterator cmdit = begin(); cmdit != end(); ++cmdit ) { + all_opts.insert( (*cmdit)->getArgQuals().begin(), (*cmdit)->getArgQuals().end() ); + } + for( CmdLineArgs2::const_iterator oi = m_a2c_cla2.begin(); oi != m_a2c_cla2.end(); ++oi ) { + if( all_opts.find( oi->first ) == all_opts.end() ) { + m_log << coral::Error << "Unknown option! " << oi->first << endl<< corENDL; + printHelp( false ); + return false; + } + } + return true; +} + + +bool Args2Container::evalArgs( int argc, const char *argv[] ) +{ + m_argv.resize( argc ); + for( int i=0; i<argc; ++i ) + m_argv[i] = argv[i]; + return evalArgs( m_argv ); +} + + +#include <boost/scoped_array.hpp> + +typedef std::basic_string<XMLCh> XercesString; + +// Converts from a narrow-character string to a wide-character string. +inline XercesString fromNative(const char* str) +{ + boost::scoped_array<XMLCh> ptr( XMLString::transcode(str) ); + return XercesString(ptr.get( )); +} + +// Converts from a narrow-character string to a wide-charactr string. +inline XercesString fromNative(const std::string& str) +{ + return fromNative(str.c_str( )); +} + +// Converts from a wide-character string to a narrow-character string. +inline std::string toNative(const XMLCh* str) +{ + if( !str ) return ""; + boost::scoped_array<char> ptr( XMLString::transcode(str) ); + return std::string(ptr.get( )); +} + +// Converts from a wide-character string to a narrow-character string. +inline std::string toNative(const XercesString& str) +{ + return toNative(str.c_str( )); +} + + + + + +void Args2Container::writeXMLContent(std::vector<std::string>& argv) +{ + // Part 1 - was defineXMLContent() + + // Initialaize to zero + DOMDocument* newDocument = 0; + try { + // Create empty document based on Args2Container.dtd + const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull}; + DOMImplementation* impl(DOMImplementationRegistry::getDOMImplementation (ls_id)); + DOMDocumentType* Arg2Type = impl->createDocumentType(XMLString::transcode("ArgList"), + XMLString::transcode(""), + XMLString::transcode("") ); + newDocument = impl->createDocument(0, XMLString::transcode("ArgList"), Arg2Type); + + // Create the base element, assign the id, and add to tree + DOMElement* toolBase = newDocument->createElement(XMLString::transcode("ToolInfo")); + toolBase->setAttribute(XMLString::transcode("toolID"), + XMLString::transcode(m_name.c_str())); + time_t tm = time(NULL); + char * date = asctime(localtime(&tm)); + toolBase->setAttribute(XMLString::transcode("date"), + XMLString::transcode(date)); + newDocument->getDocumentElement()->appendChild(toolBase); + + // Loop over container contents + for (Args2Container::const_iterator cmdit = this->begin(); cmdit != this->end(); ++cmdit) + { + // Loop over ArgQual's in each CLA2 + for (QualList::const_iterator it = (*cmdit)->getArgQuals().begin(); + it != (*cmdit)->getArgQuals().end(); + ++it) + { + // Get the pointer to the CLA2 + CmdLineArgs2* ptr = (*cmdit); + if (ptr->name() != "local") { + // Get pointer to the data for the qualifier + CmdLineArgs2::const_iterator ptr_it = ptr->find(it->first); + int first=0,last=0; + // Second part of iterator is pair of endpoints for the data in the argv + if (ptr_it != ptr->end()) {first=ptr_it->second.first;last=ptr_it->second.second;} + else continue; + // Fill a stringstream with everything in the argv for this qualifier + std::stringstream argString; + for (int j=first; j<last; ++j) { + argString << argv[j]; + if (j < (last - 1) ) argString << " "; + } + // Create an element for the CLA2 and append it to the toolBase + DOMElement* cliArg = newDocument->createElement(XMLString::transcode("CliArg")); + toolBase->appendChild(cliArg); + // Then set the attribute data name,option + // and create a child text node with the data for that option + cliArg->setAttribute(XMLString::transcode("name"), + XMLString::transcode((*cmdit)->name().c_str())); + cliArg->setAttribute(XMLString::transcode("option"), + XMLString::transcode(it->first.c_str())); + // Check whether argument takes data + if (it->second.argsize) { + if (m_ver==V1) { + m_log << coral::Info << "using compatibility for xml" << corENDL; + cliArg->appendChild( + newDocument->createTextNode(XMLString::transcode(argString.str().c_str())) + ); + } + else { + for (int j=first; j<last; ++j) { + //std::cout << it->first << " : " << j << " : " << argv[j] << std::endl; + DOMElement* datum = newDocument->createElement(XMLString::transcode("datum")); + cliArg->appendChild(datum); + datum->appendChild( + newDocument->createTextNode(XMLString::transcode(argv[j].c_str())) + ); + } + } // version check + } + } + } // QualList iterator + } // Args2Container iterator + } + catch (...) { + m_log << coral::Error << "Problem in defineXMLContent" << corENDL; + // AV Would it not be better to reset newDocument=0 and exit? + } + + // If the document could not be created, exit + if (!newDocument) { + m_log << coral::Error << "Unable to write XML" << corENDL; + return; + } + + // Part 2 - was writeXML() + try { + // get a writer instance + const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull}; + DOMImplementation* impl(DOMImplementationRegistry::getDOMImplementation (ls_id)); + +#if XERCES_VERSION_MAJOR < 3 + DOMWriter *theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter(); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTBOM, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTBOM, true); + + // set a target as the file argument + std::string file(m_xFileName+".xml"); + m_log << coral::Debug << "About to write file " << file << coral::MessageStream::endmsg; + XMLFormatTarget* myFormTarget = new LocalFileFormatTarget(file.c_str()); + + // write document to target + theSerializer->writeNode(myFormTarget, *newDocument); + + // clean up the mess + if (theSerializer!=NULL && theSerializer!=0) delete theSerializer; + if (myFormTarget!=NULL && myFormTarget!=0) delete myFormTarget; + if (newDocument!=NULL && newDocument!=0) delete newDocument; +#else + DOMLSSerializer *theSerializer = ((DOMImplementationLS*)impl)->createLSSerializer(); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTDiscardDefaultContent, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTDiscardDefaultContent, true); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTBOM, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTBOM, true); + + // set a target as the file argument + std::string file(m_xFileName+".xml"); + m_log << coral::Debug << "About to write file " << file << coral::MessageStream::endmsg; + XMLFormatTarget* myFormTarget = new LocalFileFormatTarget(file.c_str()); + + // write document to target + // See http://xerces.apache.org/xerces-c/program-dom-3.html + DOMLSOutput* theOutput = ((DOMImplementationLS*)impl)->createLSOutput(); + theOutput->setByteStream(myFormTarget); + theSerializer->write(newDocument, theOutput); + + // clean up the mess + if (theSerializer!=NULL && theSerializer!=0) delete theSerializer; + if (theOutput!=NULL && theOutput!=0) delete theOutput; + if (myFormTarget!=NULL && myFormTarget!=0) delete myFormTarget; + if (newDocument!=NULL && newDocument!=0) delete newDocument; +#endif + } + catch (const SAXException& e) { + m_log << coral::Error << "xml error: " << e.getMessage( ) << corENDL; + } + catch (const DOMException& e) { + m_log << coral::Error << "xml error: " << e.getMessage( ) << corENDL; + } + catch( const XMLException& e ) { + m_log << coral::Warning << "Failed writing XML options file: " << toNative(e.getMessage()) << coral::MessageStream::endmsg; + } +} + +//------------------------------------------------------------------------------ +// +// fillCmdLineArgsFromXML +// +// Given a file name it will try to read it as xml +// and return a vector<string> which can be used to replace the argv_v. +// DTD is implicit in code, but not checked explicitly. +// +//------------------------------------------------------------------------------ +std::vector<std::string> Args2Container::fillCmdLineArgsFromXML(std::string file) +{ + std::vector<std::string> args; + args.push_back(m_name); + + try { + XMLPlatformUtils::Initialize(); + } + catch( const XMLException& ) { + m_log << coral::Error << "fillCmdLineArgsFromXML: Error during initialization!" << corENDL; + return args; + } + + XercesDOMParser *parser = new XercesDOMParser; + parser->setValidationScheme(XercesDOMParser::Val_Auto); + parser->setDoNamespaces(false); + //DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter(); + //parser->setErrorHandler(errReporter); + + bool errorsOccured = false; + try + { + parser->parse(file.c_str()); + } + catch (const OutOfMemoryException&) + { + m_log << coral::Error << "OutOfMemoryException" << corENDL; + errorsOccured = true; + } + catch (const XMLException& e) + { + m_log << coral::Error << "An error occurred during parsing" << endl + << " Message: " << e.getMessage() << corENDL; + errorsOccured = true; + } + + catch (const DOMException& e) + { + const unsigned int maxChars = 2047; + XMLCh errText[maxChars + 1]; + + m_log << coral::Error << "\nDOM Error during parsing: '" << file << endl + << "DOMException code is: " << e.code << corENDL; + + if( DOMImplementation::loadDOMExceptionMsg(e.code, errText, maxChars) ) + m_log << coral::Error << "Message is: " << errText << corENDL; + + errorsOccured = true; + } + + catch (...) + { + m_log << coral::Error << "An error occurred during parsing\n " << corENDL; + errorsOccured = true; + } + + // if any errors just exit now + if (errorsOccured) return args; + + DOMDocument* doc = parser->getDocument(); + if( !doc ) { + m_log << coral::Error << "XML parser error? File=" << file << corENDL; + return args; + } + + // Look for ToolInfo elements + DOMNodeList* toolList = doc->getElementsByTagName(XMLString::transcode("ToolInfo")); + // If there is not exactly one exit now + if (toolList->getLength()!=1) { + m_log << coral::Error << "Wrong number of ToolInfo records" << corENDL; + return args; + } + // since there is only one, take the first item + DOMNode* toolInfo = toolList->item(0); + if (toolInfo!=NULL) { + DOMNamedNodeMap* toolAttrs=toolInfo->getAttributes(); + string toolID = toNative( toolAttrs->getNamedItem(XMLString::transcode("toolID"))->getNodeValue() ); + // check that the toolID matches the name, but don't make it fatal; + if (toolID!=m_name) { + m_log << coral::Warning << "XML toolID does not match this utility" << endl + << "toolID= " << toolID << corENDL; + } + // Loop over any children of the toolInfo + if(toolInfo->hasChildNodes()) { + DOMNodeList* cliArgList = toolInfo->getChildNodes(); + for (unsigned int i=0; i<cliArgList->getLength(); ++i) { + DOMNode* thisArg = cliArgList->item(i); + // Only process children that are CliArg's + if( toNative(thisArg->getNodeName()) == "CliArg" ) { + DOMNamedNodeMap* attrs = thisArg->getAttributes(); + // Oy Vay, can we make assigning a string any more complicated + string cla2Name = toNative( attrs->getNamedItem(XMLString::transcode("name"))->getNodeValue() ); + bool exists=false; + std::vector<CmdLineArgs2*>::const_iterator thisit = this->begin(); + while (thisit != this->end()) { + if ((*thisit)->name()==cla2Name) {exists=true; break;} + ++thisit; + } + if( exists ) { + string option = toNative( attrs->getNamedItem(XMLString::transcode("option"))->getNodeValue() ); + args.push_back(option); + // Check whether the node has any text associated with it + // Treat the first node as that text -- should be true + if( thisArg->hasChildNodes() ) { + DOMNodeList* data = thisArg->getChildNodes(); + if( data->getLength() == 1 && data->item(0)->getNodeValue() != 0 ) { + // Old format with single text value for a cliarg + string optionText = toNative( thisArg->getFirstChild()->getNodeValue() ); + //std::cout << "Found text node with value " << optionText << std::endl; + //break it apart if it is a multiarg, otherwise take as in quotes text + if ( (*thisit)->getArgQual(option).multiarg==false ) { + args.push_back(optionText); + } + else { + std::vector<std::string> tokens; + Tokenize(optionText,tokens); + for (std::vector<std::string>::iterator tit=tokens.begin(); + tit != tokens.end(); + ++tit) + args.push_back(*tit); + } + } + else { + // New format with multiple datum elements, each of which is an arg + for (unsigned int i=0; i<data->getLength(); ++i) { + if( toNative( data->item(i)->getNodeName() ) == "datum" ) { + if( data->item(i)->hasChildNodes() ) { + args.push_back( toNative( data->item(i)->getFirstChild()->getNodeValue()) ); + } + else + m_log << coral::Error << "Found empty/bad datum" << corENDL; + } + } + } + } + } + else + m_log << coral::Warning << "Class " << cla2Name << " is not used for " << m_name << corENDL; + } + } + } + } + + // trimming whitespaces from the front and the back of all values + std::vector<string>::iterator arg = args.begin(), end = args.end(); + for( ; arg != end; ++arg ) { + while( (*arg)[0] == ' ' || (*arg)[0] == '\n' || (*arg)[0] == '\t' ) + arg->replace(0,1,""); + while(true) { + char last = (*arg)[arg->size()-1]; + if( last == ' ' || last == '\n' || last == '\t' ) + arg->replace(arg->size()-1,1,""); + else + break; + } + } + return args; +} + diff --git a/Database/APR/CollectionUtilities/src/Args2Container.dtd b/Database/APR/CollectionUtilities/src/Args2Container.dtd new file mode 100644 index 0000000000000000000000000000000000000000..24ef8df732b8152be18389fdfbc79a53cf4bd4bf --- /dev/null +++ b/Database/APR/CollectionUtilities/src/Args2Container.dtd @@ -0,0 +1,41 @@ +<!-- Args2Container.dtd --> +<!-- Jack.Cranshaw@cern.ch --> + +<!-- Example + +<?xml version="1.0" encoding="UTF-8" standalone="no" ?> +<!DOCTYPE ArgList> +<ArgList> + + <ToolInfo date="Mon May 24 18:13:30 2010
" toolID="CollAppend.exe"> + <CliArg name="QueryInfo" option="-query"> + <datum>RunNumber=153200 AND (bitand(L1PassedTrigMaskTAV7, power(2,36)) = 0)</datum> + </CliArg> + <CliArg name="DstInfo" option="-dst"> + <datum>PFN:testmulti5.root</datum> + <datum>RootCollection</datum> + </CliArg> + <CliArg name="SrcInfo" option="-src"> + <datum>data10_7TeV_debugrec_hltacc_f249_m455_READ</datum> + <datum>RelationalCollection</datum> + </CliArg> + <CliArg name="SrcInfo" option="-srcconnect"> + <datum>oracle://tagspic.pic.es/ATLAS_TAGS_DATA_F_2010</datum> + </CliArg> + <CliArg name="MetaInfo" option="-merge"/> + </ToolInfo> + +</ArgList> + +--> + +<!ELEMENT ArgList (ToolInfo+)> +<!ELEMENT ToolInfo (CliArg*)> +<!ATTLIST ToolInfo + date CDATA #IMPLIED> + toolID ID #REQUIRED> +<!ELEMENT CliArg (datum*)> +<!ATTLIST CliArg + name ID #REQUIRED> + option CDATA #REQUIRED> +<!ELEMENT datum ANY> diff --git a/Database/APR/CollectionUtilities/src/CatalogInfo.cpp b/Database/APR/CollectionUtilities/src/CatalogInfo.cpp new file mode 100755 index 0000000000000000000000000000000000000000..e07ae71f3d17ecba953a8f711a00c64d9f33f752 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CatalogInfo.cpp @@ -0,0 +1,133 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionBase/ICollectionService.h" +#include "FileCatalog/IFileCatalog.h" +#include "FileCatalog/URIParser.h" +#include "CoralBase/MessageStream.h" + +/********************************************************** + +CatalogInfo is an concretization of a CmdLineArgs2 + to contain catalog info. + +Qualifiers: -ccread, -fcread + +**********************************************************/ + +#include <iostream> +using namespace std; +using namespace pool; + +CatalogInfo::CatalogInfo( ) : CmdLineArgs2("CatalogInfo"),m_valid(false) +{ + QualList markers; + markers.insert( std::make_pair("-usecatalog", ArgQual(0)) ); + markers.insert( std::make_pair("-ccread", ArgQual(1,true)) ); + markers.insert( std::make_pair("-fcread", ArgQual(1,true)) ); + markers["-usecatalog"].desc << "flag of whether to use collection catalog (DEFAULT=false)"; + markers["-ccread"].desc << "read collection catalog connection string (DEFAULT = \"\")"; + markers["-fcread"].desc << "read file catalog connection string (DEFAULT = \"\")"; + setArgQuals(markers); +} + + +bool +CatalogInfo::evalArgs(std::vector<std::string>& argv) +{ + bool retc = true; + retc = this->CmdLineArgs2::evalArgs(argv); + if (!retc) return retc; + + if ( this->hasQual("-usecatalog") ) { + m_valid=true; + } + + if ( m_valid && this->hasQual("-ccread") ) { + int ifirst = (*this)["-ccread"].first; + int ilast = (*this)["-ccread"].second; + int i = ifirst; + while ( i < ilast ) { + m_collCatalogReadVec.push_back( argv[i] ); + ++i; + } + } + + if ( this->hasQual("-fcread") ) { + int ifirst = (*this)["-fcread"].first; + int ilast = (*this)["-fcread"].second; + int i = ifirst; + while ( i < ilast ) { + m_fileCatalogReadVec.push_back( argv[i] ); + ++i; + } + } + + return retc; + +} + + +std::string +CatalogInfo::collCatalogN(unsigned int ind) +{ + if ( ind < m_collCatalogReadVec.size() ) + return m_collCatalogReadVec[ind]; + else { + std::cerr << "Out of range request for collCatConnect" << std::endl; + return std::string("BAD"); + } +} + + +std::string +CatalogInfo::fileCatalogN(unsigned int ind) +{ + if ( ind < m_fileCatalogReadVec.size() ) + return m_fileCatalogReadVec[ind]; + else { + std::cerr << "Out of range request for fileCatConnect" << std::endl; + return std::string("BAD"); + } +} + + +void +CatalogInfo::setCatalogs( pool::ICollectionService *service ) +{ + coral::MessageStream log( name() ); + for( size_t i=0; i<m_collCatalogReadVec.size(); i++ ) { + service->addReadCatalog( m_collCatalogReadVec[i] ); + log << coral::Debug + << " adding collection catalog: " << m_collCatalogReadVec[i] << coral::MessageStream::endmsg; + } +} + + + +void +CatalogInfo::setFileCatalogs( pool::IFileCatalog *fc ) +{ + coral::MessageStream log( name() ); + size_t en = m_fileCatalogReadVec.size(); + if( !en ) { + pool::URIParser parser; + parser.parse(); + fc->addReadCatalog(parser.contactstring()); + } else { + for( size_t i=0; i<en; i++ ) { + fc->addReadCatalog( m_fileCatalogReadVec[i] ); + log << coral::Debug + << " adding file catalog: " << m_fileCatalogReadVec[i] << coral::MessageStream::endmsg; + } + } + fc->connect(); +} + + + + + + diff --git a/Database/APR/CollectionUtilities/src/CmdLineArgs2.cpp b/Database/APR/CollectionUtilities/src/CmdLineArgs2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..db4a2b7c03707685f5992b61b5431fe4e1b414db --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CmdLineArgs2.cpp @@ -0,0 +1,207 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include <string> +#include <vector> +#include <algorithm> +#include <map> +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/CmdLineArgs2.h" + +/********************************************************** + +CmdLineArgs2 is an extension of a map intended to map + qualifiers (e.g. -help) of a CLI to the index of + the argv[]. It's state is determined by a list of + ArgQual's/QualList. It then uses evalArgs to apply + those qualifiers to an argv[]. + + - Qualifier -help is added in constructor + - methods described in comments in definition + +Example of Usage: + + Args2Container argsVec(thisProgram); + + QualList markers; + markers.insert( make_pair("-somequal", ArgQual()) ); + markers["-somequal"].desc << "this is a qualifier with default=this"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + cmdLineArgs.evalArgs(argc,argv); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + ... + +**********************************************************/ +using namespace std; +using namespace pool; + +#include <iostream> + +CmdLineArgs2::CmdLineArgs2( std::string name ) + : m_argsfine( false ), + m_ignoreMissingArgs( false ), + m_id(name) +{ + initTypes(); +} + + +CmdLineArgs2::CmdLineArgs2( QualList& quals, std::string name ) + : m_argsfine( false ), + m_ignoreMissingArgs( false ), + m_quals(quals), + m_id(name) +{ + initTypes(); +} + +void +CmdLineArgs2::initTypes() +{ + // Is there a way to do this with static something? + m_allowedTypes.push_back("RelationalCollection"); + m_allowedTypes.push_back("RootCollection"); + m_allowedTypes.push_back("PHYSICAL_NAME"); + m_allowedTypes.push_back("LOGICAL_NAME"); + m_allowedTypes.push_back("GUID"); +} + + +bool +CmdLineArgs2::hasQual( const std::string arg ) const +{ + return ( find(arg) != end() ); +} + + +bool +CmdLineArgs2::getOpt( const std::string opt, std::string &optval ) const +{ + const_iterator i = find(opt); + if( i == end() ) + return false; + optval = m_argv[i->second.first]; + return true; +} + +bool +CmdLineArgs2::getOpt( const std::string opt, int &optval ) const +{ + const_iterator i = find(opt); + if( i == end() ) + return false; + istringstream str( m_argv[i->second.first] ); + str >> optval; + return true; +} + +bool +CmdLineArgs2::getOpts( const std::string opt, std::vector<std::string> &optvalvec ) const +{ + const_iterator iter = find(opt); + if( iter == end() ) + return false; + optvalvec.clear(); + for( int i = iter->second.first; i<iter->second.second; i++ ) { + optvalvec.push_back( m_argv[i] ); + } + return true; +} + + +bool +CmdLineArgs2::evalArgs(std::vector<std::string>& argv) +{ + m_argv = argv; + m_argsfine = true; + int argc=argv.size(); + // Markers are info stored in the base class map + // Look for markers (anything beginning with '-') in argv and record their position + vector< std::pair<int, string> > markPos; + vector<string> marks; + for (int i=1; i<argc; ++i) { + if (argv[i][0]=='-') { + markPos.push_back(std::make_pair(i,argv[i])); + marks.push_back(argv[i]); + } + } + + // sort the marks by their position + std::sort(markPos.begin(),markPos.end()); + + // Check for required args + for( QualList::iterator quit = m_quals.begin(); quit != m_quals.end(); ++quit ) { + if( quit->second.required && + std::find(marks.begin(),marks.end(),quit->first) == marks.end() ) + { + if( !m_ignoreMissingArgs ) + std::cerr << "Must specify value for " << quit->first << std::endl; + m_argsfine = false; + } + } + + // build a map of the begin,end positions for all marks + vector< std::pair<int, string> >::iterator itmarks=markPos.begin(); + while (itmarks != markPos.end()) { + std::string markerName = itmarks->second; + int first = itmarks->first+1; + ++itmarks; + int last = (itmarks==markPos.end())? argc : itmarks->first; + insert( make_pair( markerName, make_pair(first,last) ) ); + } + + // Now check that they obey the rules and set flag accordingly + iterator mit = begin(); + while( mit != end() ) { + string qual = mit->first; + // check only options that are in the qual list + if( m_quals.find(qual) != m_quals.end() ) { + int npars = m_quals[qual].argsize; + int diff = (*this)[qual].second - (*this)[qual].first; + if (m_quals[qual].multiarg) { + if ( abs(diff) < npars || diff%npars != 0) { + std::cerr << endl << " ERROR Wrong number of parameters" << std::endl; + std::cerr << qual << " needs " << npars << " parameters" + << ", Found " << diff << std::endl; + std::cerr << "MORE INFO" << std::endl; + m_quals[qual].print(std::cerr); + std::cerr << std::endl; + m_argsfine = false; + } + } + else { + if (abs(diff)!=npars) { + std::cerr << endl << " ERROR Wrong number of parameters" << std::endl; + std::cerr << qual << " needs " << npars << " parameters" + << ", Found " << diff << std::endl; + std::cerr << "MORE INFO" << std::endl; + m_quals[qual].print(std::cerr); + std::cerr << std::endl; + m_argsfine = false; + } + } + } + //else std::cout << "CmdLineArgs: ERROR Unknown qualifier " << markerName << std::endl; + ++mit; + } + + return m_argsfine; +} + +unsigned int +CmdLineArgs2::nArgs(std::string name) +{ + if( !valid() ) { + std::cerr << "nArgs called for invalid CmdLineArgs2 class" << std::endl; + return 0; + } + CmdLineArgs2::iterator it = find(name); + if( it == end() ) + return 0; + return abs(it->second.second - it->second.first)/m_quals[name].argsize; +} + diff --git a/Database/APR/CollectionUtilities/src/CollAppendBase.cpp b/Database/APR/CollectionUtilities/src/CollAppendBase.cpp new file mode 100755 index 0000000000000000000000000000000000000000..6b9808675ece480debaad15b99ee6549c468e61d --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CollAppendBase.cpp @@ -0,0 +1,777 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollAppend.cpp + * @brief Base class for utilities that copy or merge POOL collections + * @author Marcin.Nowak@cern.ch + * $Id: + */ + +#include "CollectionUtilities/CollAppendBase.h" +#include "CollectionUtilities/MetaInfo.h" +#include "CollectionUtilities/MaxEventsInfo.h" + +#include "CoralBase/Attribute.h" + +#include "POOLCore/Exception.h" +#include "POOLCore/boost_tokenizer_headers.h" + +#include <ctime> +#include <sstream> +#include <iostream> + +#include <memory> +//# define AUTO_PTR std::unique_ptr +#define AUTO_PTR auto_ptr + +#define corENDL coral::MessageStream::endmsg + +using namespace std; +using namespace pool; + + +CollAppendBase::CollAppendBase(std::string name) : + m_thisProgram( name ), + m_provName("SourceFID_ref"), + m_provCLID("6F6A12A0-FEEF-484B-9691-94B82B90CDBA"), + m_provCollExists(false), + m_noAttrib(false), + m_numEvents(-1), + m_numEventsPerCommit(static_cast<unsigned int>(-1)), + m_numEventsPerPrint(-1), + m_numRowsCached(0), + m_extendProv(false), + m_ignoreSchemaDiff( false ), + m_evtCounterTotal( 0 ), + m_committed( false ), + m_collectionService(new pool::CollectionService()), + m_log( name ), + m_argsVec( name, true, &m_log ), + m_initialized( false ) +{ + m_metainfo = new MetaInfo(); + + // attempt to shut up Coverity, MN + time(&m_starttime); + time(&m_endtime); +} + +CollAppendBase::~CollAppendBase() +{ + delete m_metainfo; +} + +void CollAppendBase::setMetaInfo( MetaInfo* minfo ) +{ + if( m_initialized ) { + m_log << coral::Error + << "CollAppendBase::setMetaInfo() can not be used after init()" + << coral::MessageStream::endmsg; + exit(51); + } + delete m_metainfo; + m_metainfo = minfo; +} + + +int +CollAppendBase::execute( std::vector<std::string> argv_v ) +{ + // Primary try block + try { + if( !init( argv_v ) ) return 3; + + time(&m_starttime); + copyData(); + readMetadata(); + time(&m_endtime); + + addMetadata( ); + writeMetadata(); + + closeCollections(); + finalize(); + return 0; + } + catch( pool::Exception& poolException ) + { + if( m_committed ) cout << "At least one COMMIT has occurred" << endl; + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + return 1; + } + catch( std::exception& exception ) + { + if( m_committed ) cout << "At least one COMMIT has occurred" << endl; + std::cerr << "std::exception: " << exception.what() << std::endl;; + return 1; + } + catch( ... ) + { + if( m_committed ) cout << "At least one COMMIT has occurred" << endl; + std::cerr << "Unknown exception caught in main()." << std::endl;; + return 1; + } +} + + + +// Loop over destination collections and check if they exist +void +CollAppendBase::chkExistingDst(vector<bool>& existVec) +{ + // initialize the exist vector + existVec.resize( m_dstinfo.nDst() ); + + for( unsigned int i=0; i < m_dstinfo.nDst(); ++i ) { + m_log << "Testing collection '" << m_dstinfo.name(i) + << "' of type " << m_dstinfo.type(i) << coral::MessageStream::endmsg; + // testing for collection existence + try { + bool readOnly( true ); + AUTO_PTR<pool::ICollection> dstCollection( m_collectionService->handle( m_dstinfo.name(i), m_dstinfo.type(i), m_dstinfo.connect(), readOnly ) ); + + AUTO_PTR<pool::ICollectionQuery> dquery( dstCollection->newQuery() ); + pool::ICollectionCursor& cursor = dquery->execute(); + if( !cursor.next() ) { + m_log << coral::Info + << "Destination collection `" + m_dstinfo.name(i) + "' is empty." + << coral::MessageStream::endmsg; + } + existVec[i] = true; + if( m_metainfo->copyMode() != ICollMetaHandler::merge ) { + m_log << coral::Error << "Destination collection '" << m_dstinfo.name(i) + << "' exists, and the copy mode was not 'merge'. " + << "Remove the existing collection first" + << coral::MessageStream::endmsg; + exit(1); + } + } + catch ( pool::Exception& ) { + if( m_metainfo->copyMode() == ICollMetaHandler::merge ) + m_log << coral::Warning << "Destination Collection `" << m_dstinfo.name(i) + << "' does not exist; A new one will be created; " << corENDL; + existVec[i] = false; + } + } +} + + +// Open all destination collections, creating them if not existing +std::vector< pool::ICollection* > +CollAppendBase::openDestCollections( pool::CollectionDescription& destDescription ) +{ + vector<bool> destCollExistVec; + chkExistingDst( destCollExistVec ); + + vector< ICollection* > destCollections( m_dstinfo.nDst() ); + for( unsigned int i=0; i<m_dstinfo.nDst(); i++ ) { + if( destCollExistVec[i] ) { + // open existing + m_progress.print("Opening destination collections", 100*i/m_dstinfo.nDst()); + destCollections[i] = openDestCollection( m_dstinfo.name(i), + m_dstinfo.type(i), + m_dstinfo.connect() ); + // Check whether an opened dst collection has an appropriate description + if( !destDescription.isSubsetOf( destCollections[i]->description() ) ) { + m_log << coral::Error << " Destination collection '" + << destCollections[i]->description().name() + << "' does not have all the required columns to perform append" + << " (or column types do not match)" << corENDL; + exit(1); + } + } else { + // create missing destination collections + m_progress.print("Creating destination collections", 100*i/m_dstinfo.nDst()); + destDescription.setName( m_dstinfo.name(i) ); + destDescription.setType( m_dstinfo.type(i) ); + destDescription.setConnection( m_dstinfo.connect() ); + destCollections[i] = createDestCollection( destDescription ); + } + // set Row Cache size for ALL collections here + if( m_numRowsCached ) + destCollections[i]->dataEditor().setRowCacheSize( m_numRowsCached ); + } + return destCollections; +} + + + +pool::ICollection* +CollAppendBase::openDestCollection( const std::string& name, + const std::string& type, + const std::string& connect ) +{ + bool readOnly(false); + pool::ICollection* collection = m_collectionService->handle( name, type, connect, readOnly ); + if( m_catinfo.useCC() ) { + // Try to register it in the catalog + try { + m_log << coral::Debug + << "---- Registering destination collection " << name << corENDL; + m_collectionService->registerExisting( name, type, connect ); + }catch( pool::Exception& e) { + m_log << coral::Warning << "Registration of collection " + << name << " failed with error: " << e.what() << corENDL; + } + } + return collection; +} + + +pool::ICollection* +CollAppendBase::createDestCollection( const pool::ICollectionDescription& destDescription ) +{ + string name = destDescription.name(); + string type = destDescription.type(); + string connect = destDescription.connection(); + ICollection* collection = 0; + m_log << coral::Info << "Creating destination collection " << name + << " of type " << type << coral::MessageStream::endmsg; + try { + bool overwrite( false ); // don't overwrite existing collection + collection = m_collectionService->create( destDescription, overwrite ); + // created the collection - now try to register it + if(m_catinfo.useCC()) { + bool reg_ok = false; + m_log << coral::Debug + << "---- Registering destination collection " << name << corENDL; + try { + bool overwrite( true ); // overwrite existing catalog entry + reg_ok = m_collectionService->registerExisting( collection, overwrite, name ); + } catch(pool::Exception& e) { + m_log << coral::Error << e.what() << coral::MessageStream::endmsg; + } + if( !reg_ok ) { + m_log << coral::Warning << "Registration of destination collection " << name + << " FAILED!" << coral::MessageStream::endmsg; + } + } + } catch( pool::Exception& ) { + m_log << coral::Info <<"Could not create destination collection " << name + << " - testing again if the collection exists already " << coral::MessageStream::endmsg; + if( m_collectionService->exists( name, type, connect ) ) { + m_log << coral::Info << "Opening destination collection " << name + << " in update mode" << coral::MessageStream::endmsg; + bool forUpdate(false); + collection = m_collectionService->handle( name, type, connect, forUpdate ); + } else { + throw; + } + } + return collection; +} + + + +// Create Description of the destination collection +// use only selected columns if this option was specified +// - updates m_removedTokens and m_provCollExists when processing provenance +pool::CollectionDescription +CollAppendBase::buildDstDesc(const pool::ICollectionDescription& sourceDesc + ,const pool::TokenList &tokens + ,const coral::AttributeList& attribs + ,const string queryopt + ) +{ + // find which will be the Primary Token in the output collection + typedef boost::tokenizer<boost::char_separator<char> > Tizer; + const boost::char_separator<char> sep(" ,"); + Tizer tizer( queryopt, sep ); + string destPrimaryTok; + int pos = 1, prev_pos = 1000000; + for( TokenList::const_iterator token = tokens.begin(); token != tokens.end(); ++token ) { + const string name( token.tokenName() ); + if( name == sourceDesc.eventReferenceColumnName() ) { + // dest Primary will be the same as source + destPrimaryTok = name; + break; + } + // check which posistion this token name has on the queryopt list, take the first + for( Tizer::iterator col = tizer.begin(); col != tizer.end(); ++col, ++pos ) { + if( *col == name ) { + if( pos < prev_pos ) { + prev_pos = pos; + destPrimaryTok = name; + } + break; + } + } + } + m_log << (destPrimaryTok==sourceDesc.eventReferenceColumnName()? coral::Debug : coral::Info ) + <<"Primary Token in the output collection will be '" << destPrimaryTok << "'" << corENDL; + + CollectionDescription destDescription( sourceDesc.name(), "destCollType" ); + for( TokenList::const_iterator token = tokens.begin(); token != tokens.end(); ++token ) { + string name( token.tokenName() ); + if( name == destPrimaryTok ) { + if( name == "Token" ) { + /* MN: hack - "Token" column name taken from RootCollection */ + // remap back the RootCollection default event ref name to the new default + name = pool::CollectionBaseNames::defaultEventReferenceColumnName(); + } else { + // source had user defined name, copy it + destDescription.setEventReferenceColumnName( name ); + } + } + const ICollectionColumn& column = sourceDesc.column( name ); + // Do not copy PROV columns if adding a new PROV columns + if( column.annotation().find("PROV") == string::npos || !m_extendProv ) { + destDescription.insertTokenColumn( name, column.annotation(), + column.collectionFragmentName() ); + } else { + m_log << coral::Debug << "Ignoring source collection provenance Token attribute: " + << name << corENDL; + m_removedTokens.insert( name ); + } + } + // add the new provenance Token + if( m_extendProv ) try { + destDescription.insertTokenColumn( m_provName, "PROV", + destDescription.collectionFragment(0).name() ); + m_provCollExists = true; + }catch( pool::Exception &e ) { + m_log << coral::Error << "Failed to add provenance Token attribute " << m_provName + << "; " << e.what() << corENDL; + } + // add new References from the -addattrib option + for( TokenList::const_iterator token = m_addedReferences.begin(); + token != m_addedReferences.end(); ++token ) { + destDescription.insertTokenColumn( token.tokenName() ); + } + + // add requested ATTRIBUTES + for( coral::AttributeList::const_iterator attrib = attribs.begin(); + attrib != attribs.end(); ++attrib ) { + const string& name = attrib->specification().name(); + const ICollectionColumn& column = sourceDesc.column( name ); + destDescription.insertColumn( name, column.type(), column.annotation(), + column.collectionFragmentName(), + column.maxSize(), column.sizeIsFixed() ); + } + // add new attributes from the -addattrib option + for( coral::AttributeList::const_iterator attrib = m_addedAttributes.begin(); + attrib != m_addedAttributes.end(); ++attrib ) { + const string& name = attrib->specification().name(); + const string& type = attrib->specification().typeName(); + const ICollectionColumn* column = destDescription.columnPtr(name); + if( !column ) { + // only add not existing columns + destDescription.insertColumn( name, type ); + } else { + if( type != column->type() ) { + string err = "Type mismatch between existing attribute '" + column->type() + " " + name + " and added attribute type: '" + type + "'"; + throw pool::Exception(err, "buildDstDesc", "CollAppend"); + } + } + } + + return destDescription; +} + + +/// open one of the source collectinos +/// * throws exceptions in case of failure +pool::ICollection* +CollAppendBase::openSrcCollection( const std::string& name, + const std::string& type, + const std::string& connect ) +{ + m_log << coral::Info + << "Opening source collection " << name << " of type " << type << corENDL; + bool readOnly( true ); + pool::ICollection *srcCollection = m_collectionService->handle( name, type, connect, readOnly ); + + //Try to register it in the catalog + if (m_catinfo.useCC()) { + try { + m_progress.print("Registering source collection", 0); + m_log << coral::Debug + << "---- Registering source collection " << name << corENDL; + m_collectionService->registerExisting( name, type, connect, name); + } catch(pool::Exception& e) { + m_log << coral::Info << " Source collection registration failed: " << e.what() + << corENDL; + } + } + + m_log << coral::Debug << "Opened source collection " << name << corENDL; + return srcCollection; +} + + +std::string +CollAppendBase::readCollectionGUID( pool::ICollection* collection ) +{ + if( (!m_extendProv && m_metainfo->noMetadata() ) + || !collection->metadata().existsKey(CollectionBaseNames::CollIDMdataKey()) ) + return string("NOTFOUND") ; + string GUID = collection->metadata().getValueForKey(CollectionBaseNames::CollIDMdataKey()); + m_log << coral::Debug << "Collection " << collection->description().name() << " GUID= " + << GUID << corENDL; + return GUID; +} + + + +/// copy all data rows. +/// Opens source and destination collections, leaves them open and uncommitted +void +CollAppendBase::copyData() +{ + // loop over all source collections + size_t k=0; + CollectionDescription destDescription( m_srcinfo.name(0), "type" ); + m_log << coral::Info << "Number of input collections: " << m_srcinfo.nSrc() << corENDL; + for( unsigned int sCollN=0; sCollN < m_srcinfo.nSrc(); sCollN++ ) + { + m_progress.print("Opening source collection", 100*sCollN/m_srcinfo.nSrc()); + pool::ICollection *srcCollection = openSrcCollection( m_srcinfo.name(sCollN), m_srcinfo.type(sCollN), m_srcinfo.connect() ); + m_srcCollections.push_back( srcCollection ); + m_srcCountVec.push_back(0); + + m_log << "Creating query for the source collection" << coral::MessageStream::endmsg; + AUTO_PTR<ICollectionQuery> srcQuery( srcCollection->newQuery() ); + srcQuery->setRowCacheSize( 1000 ); + srcQuery->setCondition( m_queryinfo.query(sCollN) ); + if( m_queryinfo.queryOptions().size() ) { + // add attributes requested on the command line + srcQuery->addToOutputList( m_queryinfo.queryOptions() ); + // -noattrib option adds all tokens on top of that + if( m_noAttrib ) + srcQuery->selectAllTokens(); + srcQuery->skipEventReference(); + } else { + if( m_noAttrib ) + srcQuery->selectAllTokens(); + else + srcQuery->selectAll(); + } + m_log << "Executing query for the source collection" << coral::MessageStream::endmsg; + m_progress.print("Querying source collection", 0); + pool::ICollectionCursor& cursor = srcQuery->execute(); + + // Prepare destination collections at the first source iteration + if( sCollN == 0 ) { + destDescription = buildDstDesc( m_srcCollections[0]->description(), + cursor.currentRow().tokenList(), cursor.currentRow().attributeList(), + m_queryinfo.queryOptions() ); + m_destCollections = openDestCollections( destDescription ); + } else { + // Source Collection 2+ + // build attr list for input collection for check with destination + CollectionDescription desc( m_srcinfo.name(sCollN), "type" ); + desc = buildDstDesc( m_srcCollections[sCollN]->description(), + cursor.currentRow().tokenList(), cursor.currentRow().attributeList(), + m_queryinfo.queryOptions() ); + if( ! desc.isSubsetOf( destDescription ) ) { + m_log << coral::Error << "Source collection '" + << srcCollection->description().name() + << "' has attributes not existing in the destination collection and can not be copied." + << " EXITING" << corENDL; + exit(2); + } + // For each source collection get clean dest. coll. row buffers + for( unsigned int i=0; i<m_dstinfo.nDst(); i++ ) { + m_destCollections[i]->dataEditor().clearRowBuffers(); + } + } + + m_log << "Iterating over the source collection" << coral::MessageStream::endmsg; + long long evtCounter = 0; + const std::string& srcRefName = srcCollection->description().eventReferenceColumnName(); + m_provToken.setDb( readCollectionGUID(srcCollection) ); + size_t srcSize = 1; // the size of the query result set is not known - todo sometime + size_t progressStep = (srcSize>99) ? srcSize/100 : 1; + + // loop over input collection and copy all rows to all output collections + while( cursor.next() && (m_numEvents < 0 || m_numEvents > evtCounter) ) { + evtCounter++; + m_evtCounterTotal++; + if( evtCounter % progressStep ) + m_progress.print("Copying collection(s)", (long)(100*evtCounter/srcSize)); + + for( unsigned int i=0; i<m_dstinfo.nDst(); i++ ) + { + // Get row buffer for adding data to collection. + CollectionRowBuffer &rowBuffer = m_destCollections[i]->dataEditor().rowBuffer(); + if( evtCounter == 1 ) { + // first row: + //cout << "XXXXXXXX recreating the buffer" << endl; + //cout << " rowBuffer: "; rowBuffer.attributeList().toOutputStream(cout); cout << endl; + //cout << " input row: "; cursor.currentRow().attributeList().toOutputStream(cout); cout << endl; + //cout << " new attribs: "; m_addedAttributes.toOutputStream(cout); cout << endl; + // insert default values for the new attributes requested with -addattr option + // use temporary attribute list to avoid changing the defaults through merge + coral::AttributeList newAttribsTmp( m_addedAttributes ); + rowBuffer.attributeList().merge( newAttribsTmp ); + + // add new References from the -addattrib option + for( TokenList::const_iterator token = m_addedReferences.begin(); + token != m_addedReferences.end(); ++token ) { + token->setData( &rowBuffer.tokenList()[ token.tokenName() ] ); + } + } + + // copy values from source + rowBuffer.attributeList().merge( cursor.currentRow().attributeList() ); + + // copy tokens + const std::string& dstRefName = m_destCollections[i]->description().eventReferenceColumnName(); + const TokenList& tokens = cursor.currentRow().tokenList(); + for( TokenList::const_iterator ti = tokens.begin(); ti != tokens.end(); ++ti ) { + if( !m_extendProv || m_removedTokens.find( ti.tokenName() ) == m_removedTokens.end() ) { + if( ti.tokenName() == srcRefName ) { + ti->setData( &rowBuffer.tokenList()[ dstRefName ] ); + } else { + ti->setData( &rowBuffer.tokenList()[ ti.tokenName() ] ); + } + } + } + + if( m_provCollExists ) { + m_provToken.setData( &rowBuffer.tokenList()[m_provName] ); + } + m_destCollections[i]->dataEditor().insertRow( rowBuffer ); + + if( !(m_evtCounterTotal % m_numEventsPerCommit) ) { + m_destCollections[i]->commit(); + m_committed = true; + } + + if ( (m_numEventsPerPrint >= 0) && !(m_evtCounterTotal % m_numEventsPerPrint) ) { + m_log << m_evtCounterTotal << " events appended" << corENDL; + } + } + } + if( k < m_srcCountVec.size() ) m_srcCountVec[k]=(long)evtCounter; + k++; + } +} + + +bool +CollAppendBase::init( std::vector<std::string> argv_v ) +{ + m_provToken.setCont( "POOLCollection" ); + m_provToken.setClassID( Guid(m_provCLID) ); + m_provToken.setTechnology( 0 ); + + m_argsVec.desc << "Takes one or more collections (of varying formats) and " + << "merges them and writes the results to one or more output " + << "collections (of varying formats). " << endl + << "Infile Metadata handling depends on -copy, -extract and -merge options." << endl; + + // list of CollAppend *specific* cli keys and their argument properties + QualList markers; + markers["-noattrib"] = ArgQual("Copy only Tokens. Attributes specified with -queryopt will still be copied as well", 0); + markers["-nevtpercommit"].desc << "[Max. number of events to process between " + << "collection commits (DEFAULT = -1)]"; + markers["-nevtperprint"].desc << "Number of events to process before " + << "each print of status to the screen."; + markers["-nevtcached"].desc << "size of the insert buffer for bulk operations " + << "DEFAULT = 0 (no bulk operations)"; + // markers["-ignoreschemadiff"].desc << "attempts to copy collections even if schema seems incompatible " + // << "(use at your own risk)"; + markers["-extendprov"].desc << "Add GUIDs of the input collections to the new Token attribute <arg>. " + << "If <arg> is \"default\" the default name will be used: " << m_provName ; + + markers["-addattr"] = ArgQual("Add new attributes to output collection", 3, true); + markers["-addattr"].desc << " in the form: NAME TYPE VALUE." + << " accepted types: char,short,int,int64,uchar,ushort,uint,uint64,float,double,string,token"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + m_argsVec.push_back(&cmdLineArgs); // Add it to the list + + // Classes with shared cli keys and their argument properties + // Add them to the list + m_argsVec.push_back(&m_catinfo); + m_argsVec.push_back(&m_queryinfo); + m_argsVec.push_back(&m_srcinfo); + m_argsVec.push_back(&m_dstinfo); + m_argsVec.push_back(m_metainfo); + m_argsVec.push_back(&m_progress); + MaxEventsInfo maxEvents; + m_argsVec.push_back(&maxEvents); + + // Check that all cmd line args are valid + if( !m_argsVec.evalArgs(argv_v) ) return false; + if( !m_argsVec.checkValid() ) return false; + + if( maxEvents.specified() ) + m_numEvents = maxEvents.get(); + m_noAttrib = cmdLineArgs.hasQual("-noattrib"); + //m_ignoreSchemaDiff = cmdLineArgs.hasQual("-ignoreschemadiff"); + cmdLineArgs.getOpt("-nevtpercommit", m_numEventsPerCommit); + cmdLineArgs.getOpt("-nevtcached", m_numRowsCached); + string val; + if( cmdLineArgs.getOpt("-nevtperprint", val) ) { + m_numEventsPerPrint = ( val=="all"? -1 : atoi(val.c_str()) ); + } + if( cmdLineArgs.getOpt("-extendprov", val) ) { + m_extendProv = true; + if( val != "default" ) + m_provName = val; // + "_ref"; + } + vector<string> addAttributesVec; + if( cmdLineArgs.getOpts("-addattr", addAttributesVec) ) { + for(size_t i=0; i<addAttributesVec.size(); i+=3) { + const string attrname = addAttributesVec[i]; + const string attrtype = addAttributesVec[i+1]; + istringstream val( addAttributesVec[i+2] ); + if( attrtype == "int" ) { + m_addedAttributes.extend(attrname, attrtype); + val >> m_addedAttributes[attrname].data<int>(); + } else if( attrtype == "uint" ) { + m_addedAttributes.extend(attrname, "unsigned int"); + val >> m_addedAttributes[attrname].data<unsigned int>(); + } else if( attrtype == "char" ) { + m_addedAttributes.extend(attrname, attrtype); + val >> m_addedAttributes[attrname].data<char>(); + } else if( attrtype == "uchar" ) { + m_addedAttributes.extend(attrname, "unsigned char"); + val >> m_addedAttributes[attrname].data<unsigned char>(); + } else if( attrtype == "short" ) { + m_addedAttributes.extend(attrname, attrtype); + val >> m_addedAttributes[attrname].data<short>(); + } else if( attrtype == "ushort" ) { + m_addedAttributes.extend(attrname, "unsigned short"); + val >> m_addedAttributes[attrname].data<unsigned short>(); + } else if( attrtype == "float" ) { + m_addedAttributes.extend(attrname, attrtype); + val >> m_addedAttributes[attrname].data<float>(); + } else if( attrtype == "double" ) { + m_addedAttributes.extend(attrname, attrtype); + val >> m_addedAttributes[attrname].data<double>(); + } else if( attrtype == "string" ) { + m_addedAttributes.extend(attrname, attrtype); + // val >> string stops at the first space! MN + m_addedAttributes[attrname].data<string>() = val.str(); + } else if( attrtype == "int64" ) { + m_addedAttributes.extend(attrname, "long long"); + val >> m_addedAttributes[attrname].data<long long>(); + } else if( attrtype == "uint64" ) { + m_addedAttributes.extend(attrname, "unsigned long long"); + val >> m_addedAttributes[attrname].data<unsigned long long>(); + } else if( attrtype == "token" ) { + m_addedReferences.extend(attrname); + if( val.str().length() > 1 ) { + m_addedReferences[attrname].fromString( val.str() ); + } + } else { + markers["-addattr"].print(std::cerr); + throw pool::Exception(string("Unsupported attribute type for -addattr option: ") + attrtype, + "init", "CollAppend"); + } + } + } + if( m_metainfo->copyMode() == ICollMetaHandler::copy && m_srcinfo.nSrc() > 1 ) { + m_log << coral::Error << "Copy mode: 'copy' supports only one source collection" << corENDL; + return false; + } + + m_catinfo.setCatalogs( m_collectionService ); + m_initialized = true; + return true; +} + + +void +CollAppendBase::readMetadata() +{ + m_progress.print("Metadata processing", 0); + if( m_metainfo->checkMetadata( m_srcCollections, m_destCollections ) ) { + // only read metadata if "check" did not report any conflicts in the dest collections + m_metainfo->readMetadata( m_srcCollections ); + } +} + + +void +CollAppendBase::addMetadata( ) +{ + // add extract information + if( !m_metainfo->noMetadata() && m_metainfo->copyMode() == ICollMetaHandler::extract ) { + string user; if (getenv("USER")!=NULL) user = getenv("USER"); + string host; if (getenv("HOST")!=NULL) host = getenv("HOST"); + string userhost = user + "@" + host; + stringstream stimestr, etimestr; + + std::string inputCollections = m_srcinfo.name(0); + for( size_t i=1; i<m_srcinfo.nSrc(); i++ ) + inputCollections += ":"+m_srcinfo.name(i); + std::string outputCollections = m_dstinfo.name(0); + for( size_t i=1; i<m_dstinfo.nDst(); i++ ) + outputCollections += ":"+m_dstinfo.name(i); + stimestr << m_starttime; etimestr << m_endtime; + + m_metainfo->addMetaEntry( "TimeBegin", stimestr.str() ); + m_metainfo->addMetaEntry( "TimeEnd", etimestr.str() ); + m_metainfo->addMetaEntry( "ExtractNode", userhost ); + m_metainfo->addMetaEntry( "InputCollections", inputCollections ); + m_metainfo->addMetaEntry( "OutputCollections", outputCollections ); + m_metainfo->addMetaEntry( "Query", (m_queryinfo.query().length() ? m_queryinfo.query() : "1=1") ); + } +} + + +/// write all metadata +void +CollAppendBase::writeMetadata( ) +{ + m_progress.print("Metadata writing", 0); + m_metainfo->writeMetadata( m_destCollections ); +} + + + +void +CollAppendBase::closeCollections( ) +{ + for( size_t i=0; i<m_dstinfo.nDst(); i++ ) { + m_destCollections[i]->commit(); + m_committed = true; + m_destCollections[i]->close(); + delete m_destCollections[i]; + } + m_destCollections.resize(0); + + for( size_t i=0; i<m_srcinfo.nSrc(); i++ ) { + m_srcCollections[i]->close(); + delete m_srcCollections[i]; + } + m_srcCollections.resize(0); +} + + + +void +CollAppendBase::finalize( ) +{ + // List individual collections (with evt counts) + cout << "CollAppend: Finished appending input collection(s): " << endl; + for( unsigned int i=0; i<m_srcinfo.nSrc(); i++ ) { + cout << i+1 << ": `" << m_srcinfo.name(i) << ":" + << m_srcinfo.type(i) << "' (" + << m_srcCountVec[i] << " events) " + << endl; + } + + // List individual output collections + cout << " to output collection(s): " << endl; + for( unsigned int i=0; i<m_dstinfo.nDst(); i++ ) { + cout << i+1 << ": `" << m_dstinfo.name(i) << ":" + << m_dstinfo.type(i) << "' " + << endl; + } + + // Note if there was a selection involved + if (m_queryinfo.query().size() > 0) cout << "Qualified by " << m_queryinfo.query() << endl; + // Give total for output + cout << endl; + cout << " ===> " << m_evtCounterTotal << " total events appended to each output" << endl; + cout << " ===> " << "Processing took " << difftime(m_endtime, m_starttime) << " seconds" << endl; + if( m_committed ) cout << "At least one COMMIT has occurred" << endl; +} diff --git a/Database/APR/CollectionUtilities/src/CollMetaRegistry.cpp b/Database/APR/CollectionUtilities/src/CollMetaRegistry.cpp new file mode 100644 index 0000000000000000000000000000000000000000..953116e53820cea1c7ce585a1195b7fd0caf98b6 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CollMetaRegistry.cpp @@ -0,0 +1,29 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/CollMetaRegistry.h" +#include "CollectionUtilities/TagMetadataKey.h" + +using namespace pool; + +CollMetaRegistry* CollMetaRegistry::m_instance = 0; + +CollMetaRegistry* CollMetaRegistry::Instance() +{ + if (m_instance == 0) { + m_instance = new CollMetaRegistry(); + } + return m_instance; +} + +CollMetaRegistry::Container::const_iterator CollMetaRegistry::seek(std::string input) +{ + TagMetadataKey tkey; + if( tkey.fromString(input) ) { + return this->find(tkey.key()); + } else { + return this->find(input); + } +} + diff --git a/Database/APR/CollectionUtilities/src/CollSplitByGUIDBase.cpp b/Database/APR/CollectionUtilities/src/CollSplitByGUIDBase.cpp new file mode 100755 index 0000000000000000000000000000000000000000..055b4c86746a76c6ad810e1d12c83a97bf1917de --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CollSplitByGUIDBase.cpp @@ -0,0 +1,505 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollSplitByGUIDBase.cpp + * @brief Base class for the utility to list the file GUIDs used by a POOL collection and split the + * collection into sub-collections by GUID + * @author K. Karr <Kristo.Karr@cern.ch>, C. Nicholson <Caitriana.Nicholson@cern.ch>, Marcin.Nowak@cern.ch + * $Id: CollSplitByGUIDBase.cpp 527111 2012-11-21 21:11:08Z gemmeren $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/CollectionDescription.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/TokenList.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/ICollectionDataEditor.h" +#include "CollectionBase/ICollectionMetadata.h" + +#include "CoralBase/Attribute.h" +#include "CoralBase/MessageStream.h" + +#include "POOLCore/Exception.h" + +#include "CollectionUtilities/CollSplitByGUIDBase.h" + + +#include <iostream> +#include <sstream> +#include <fstream> +#include <memory> // for auto_ptr +#define AUTO_PTR auto_ptr +#include <stdexcept> + +using namespace std; +using namespace pool; + +#define corENDL coral::MessageStream::endmsg + + +bool guidPairSort( const pair<Guid, int> &p1, const pair<Guid, int> &p2){ + return p1.second > p2.second; +} + + + +CollSplitByGUIDBase::CollSplitByGUIDBase(const std::string& name) : + m_thisProgram( name ), + m_maxSplit( 500 ), + m_minEvents( -1 ), + m_rowsCached( 1000 ), + m_numEventsPerCommit( static_cast<unsigned int>(-1) ), + m_numRowsCached( 0 ), + m_collectionService( new pool::CollectionService() ), + m_log( name ), + m_argsVec( name, true, &m_log ), + m_collectionPool( 0 ), + m_outputCollSeqN( 1 ) + +{ +} + + +CollSplitByGUIDBase::~CollSplitByGUIDBase() +{ + delete m_collectionPool; +} + + + + +int +CollSplitByGUIDBase::execute( std::vector<std::string> argv_v ) +{ + // Primary try block + try { + if( init( argv_v ) ) { + //time(&m_starttime); + openSourceCollections(); + openDestCollections(); + copyRows(); + //time(&m_endtime); + copyMetadata(); + finalize(); + } + + return 0; + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::exception: " << exception.what() << std::endl;; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception caught in main()." << std::endl;; + return 1; + } +} + + + + +bool +CollSplitByGUIDBase::init( std::vector<std::string> argv_v ) +{ + m_argsVec.desc << m_thisProgram << " is a tool for querying an event collection, or " + << "collections, and storing the results in a number of output collections, " + << "one for each different event file GUID. " << endl + << "Currently, these sub-collections are output as LOCAL, " + << "ROOT-based collections with a fixed name and will appear in the directory from " + << "which the program was executed." << endl; + + // list of CollAppend *specific* cli keys and their argument properties + QualList markers; + markers["-guidfile"].desc << "List of GUIDs for output collections. One GUID per line. Lines starting with ':' assign collection name for GUIDS that follow"; + markers["-maxsplit"].desc << "Limit number of produced subcollections. Spillover will be stored in the last collection"; + markers["-rowscached"].desc << "Number of rows cached in memory for each output collection that is not in open collections pool. Bigger cache may speed up writing, but uses more memory. DEFAULT=" << m_rowsCached; + markers["-splitref"].desc << "Name of ref to use for boundaries of split (DEFAULT=primary ref)"; + markers["-minevents"].desc << "minimum number of events required to create a separate output collection for a particular GUID"; + markers["-nevtpercommit"].desc << "Max. number of events to process between " + << "output transaction commits (default is infinity)"; + markers["-nevtcached"].desc << "size of the insert buffer for bulk operations " + << "DEFAULT = 0 (no bulk operations)"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + m_argsVec.push_back(&cmdLineArgs); // Add it to the list + + // Classes with shared cli keys and their argument properties + // Add them to the list + m_argsVec.push_back(&m_catinfo); + m_argsVec.push_back(&m_queryinfo); + m_argsVec.push_back(&m_srcinfo); + m_argsVec.push_back(&m_metainfo); + + // Check that all cmd line args are valid + if( !m_argsVec.evalArgs(argv_v) ) return false; + if( !m_argsVec.checkValid() ) return false; + + cmdLineArgs.getOpt("-splitref", m_splitRef); + cmdLineArgs.getOpt("-minevents", m_minEvents); + cmdLineArgs.getOpt("-rowscached", m_rowsCached); + cmdLineArgs.getOpt("-maxsplit", m_maxSplit); + if( m_maxSplit < 1 ) m_maxSplit = 1; + string guidfile; + if( cmdLineArgs.getOpt("-guidfile", guidfile) ) { + if( !readGuidList(guidfile) ) { + exit( -5 ); + } + } + // --- + // EXPERT OPTIONS + // For tuning purposes the number of events between commits can be specified + // --- + cmdLineArgs.getOpt("-nevtpercommit", m_numEventsPerCommit); + cmdLineArgs.getOpt("-nevtcached", m_numRowsCached); + + m_catinfo.setCatalogs( m_collectionService ); + if( !m_collectionPool ) m_collectionPool = new CollectionPool(50, m_rowsCached); + + return true; +} + + + +void +CollSplitByGUIDBase::openSourceCollections() +{ + for( unsigned int i=0; i<m_srcinfo.nSrc(); i++ ) { + m_log << coral::Info + << "Opening source collection " << m_srcinfo.name(i) + << " of type " << m_srcinfo.type(i) << coral::MessageStream::endmsg; + bool readOnly( true ); + pool::ICollection *collection = m_collectionService->handle( m_srcinfo.name(i), m_srcinfo.type(i), m_srcinfo.connect(), readOnly ); + m_srcCollections.push_back( collection ); + + if( (m_queryinfo.query() != "" || m_queryinfo.queryOptions() != "") + && !collection->description().equals( m_srcCollections[0]->description() ) ) { + m_log << coral::Warning << " The schemas of one or more " + << "input collections are different and a query has been " + << "requested. This may lead to unexpected behavior." + << coral::MessageStream::endmsg; + } + } +} + + + +void +CollSplitByGUIDBase::openDestCollections() +{ + std::map< Guid, int > eventsPerGuid; + std::multimap< std::string, std::string> invCollMap; // map to keep sub-collection name --> guid + + for( unsigned int i=0; i<m_srcCollections.size(); i++ ) + { + ICollection* collection = m_srcCollections[i]; + pool::ICollectionQuery *collQuery = collection->newQuery(); + collQuery->setCondition( m_queryinfo.query() ); + if( m_queryinfo.queryOptions().size() ) { + collQuery->addToOutputList( m_queryinfo.queryOptions() ); + } else { + collQuery->selectAll(); + } + + m_log << coral::Info << "Executing query for the source collection" << coral::MessageStream::endmsg; + pool::ICollectionCursor& cursor = collQuery->execute(); + + // set parameters for the sub-collections. + // currently caters only for writing local, Root-based collections. + std::string subCollType = "RootCollection"; + std::string subCollConnect = ""; + + // first loop: for each event, find the GUID / fileId and + // count how many events match that GUID + int totalEvents = 0; + int uniqueGuids = 0; + + // Token name to split on (if not specified, use default for each source coll) + string refname = ( m_splitRef.size()? m_splitRef : collection->description().eventReferenceColumnName() ); + while( cursor.next() ) { + const pool::TokenList &tokens = cursor.currentRow().tokenList(); + for( pool::TokenList::const_iterator iter = tokens.begin(); iter != tokens.end(); ++iter ) { + if( iter.tokenName() == refname ) { + Guid guid = iter->dbID(); + if( eventsPerGuid.find( guid ) == eventsPerGuid.end() ) { + // new unique GUID found + eventsPerGuid[ guid ] = 1; + uniqueGuids++; + } + else { + eventsPerGuid[ guid ]++; + } + } + } + totalEvents++; + } + m_log << coral::Info << "Collection " << collection->description().name() << " has " << totalEvents + << " entries with " << uniqueGuids << " unique file GUIDs in Token " << refname + << coral::MessageStream::endmsg; + + for( map<Guid, int>::iterator guidIter1 = eventsPerGuid.begin(), + end = eventsPerGuid.end(); guidIter1 != end; ++guidIter1 ) + { + m_log << coral::Debug << "GUID: " << guidIter1->first << ", rows: " + << guidIter1->second << corENDL; + } + //-------- make suitable output collections ( each with nEvents >= m_minEvents ) + int createdCollections = 0; + + vector<pair<Guid, int> > sortedGuids; + map<string, ICollection*> collMap; + for( map<Guid, int>::iterator guidIter1 = eventsPerGuid.begin(), + end = eventsPerGuid.end(); guidIter1 != end; ++guidIter1 ) + { + string guid = guidIter1->first.toString(); + string subCollName = collectionNameForGuid( guid ); + if( !subCollName.empty() ) + { + // process guid from a list + pool::ICollection* subCollection = 0; + if( collMap.find( subCollName ) == collMap.end() ) + { + // create a new collection + pool::CollectionDescription newDestDesc( collection->description() ); + newDestDesc.setName( subCollName ); + newDestDesc.setType( subCollType ); + newDestDesc.setConnection( subCollConnect ); + subCollection = m_collectionService->create( newDestDesc, true ); + createdCollections++; + subCollection->dataEditor().setRowCacheSize( 0 ); + collMap[ subCollName ] = subCollection; + } + else + { + // find an already created collection + subCollection = collMap[ subCollName ]; + } + // map to appropriate GUID + m_collectionPool->addCollection( guid, subCollection ); + invCollMap.insert( std::pair<std::string, std::string>( subCollName, guid ) ); + } + else + { + // guid not from the list, keep it for default processing + sortedGuids.push_back( *guidIter1 ); + } + } + // sort the remaining GUIDs by cardinality + sort( sortedGuids.begin(), sortedGuids.end(), guidPairSort ); + + int rowCounter = 0; + unsigned guidCounter = 0; + pool::ICollection* subCollection = 0; + string subCollName; + vector<pair<Guid, int> >::iterator guidIter2 = sortedGuids.begin(); + while( guidIter2 != sortedGuids.end() ) + { + guidCounter++; + std::string guid = guidIter2->first.toString(); + int thisNumEvents = guidIter2->second; + bool collLimit = ( createdCollections >= m_maxSplit ); + + // create a new output collection if + if( !subCollection // there is no collection yet or + || ( rowCounter >= m_minEvents // enough events were written to the previous one + && !collLimit ) ) // but we are not over the collection limit + { + // create a new sub-collection + subCollName = generateNextCollName(); + pool::CollectionDescription newDestDesc( collection->description() ); + newDestDesc.setName( subCollName ); + newDestDesc.setType( subCollType ); + newDestDesc.setConnection( subCollConnect ); + + subCollection = m_collectionService->create( newDestDesc, true ); + subCollection->dataEditor().setRowCacheSize( 0 ); + createdCollections++; + rowCounter = 0; + } + // map to appropriate GUID + m_collectionPool->addCollection( guid, subCollection ); + invCollMap.insert( std::pair<std::string, std::string>(subCollName, guid)); + rowCounter += thisNumEvents; + ++guidIter2; + } + + std::string lastCollName = ""; + for( std::multimap<std::string, std::string>::const_iterator invCollIter = invCollMap.begin(); + invCollIter != invCollMap.end(); + ++invCollIter) + { + std::string thisCollName = invCollIter->first; + if (thisCollName == lastCollName) + m_log << coral::Info << " " << invCollIter->second; + else + { + if (invCollIter != invCollMap.begin()) + m_log << coral::Info << coral::MessageStream::endmsg; + m_log << coral::Info << "Created new sub-collection " << thisCollName << " with files:\t" << invCollIter->second; + } + lastCollName = thisCollName; + } + m_log << coral::Info << coral::MessageStream::endmsg; + } +} + + + +void +CollSplitByGUIDBase::copyRows() +{ + for( unsigned int i=0; i<m_srcCollections.size(); i++ ) + { + ICollection* collection = m_srcCollections[i]; + auto_ptr<ICollectionQuery> collQuery( collection->newQuery() ); + collQuery->setCondition( m_queryinfo.query() ); + if( m_queryinfo.queryOptions().size() ) { + collQuery->addToOutputList( m_queryinfo.queryOptions() ); + } else { + collQuery->selectAll(); + } + pool::ICollectionCursor& cursor = collQuery->execute(); + + const std::string srcRefName = collection->description().eventReferenceColumnName(); + const std::string dstRefName = m_collectionPool->getDstRefName(); + // Token name to split on (if not specified, use default for each source coll) + const std::string splitRefName = ( m_splitRef.size()? m_splitRef : srcRefName ); + size_t row = 0; + while( cursor.next() ) { + row ++; + // get file GUID from event + const TokenList &tokens = cursor.currentRow().tokenList(); + string guid = tokens[ splitRefName ].dbID().toString(); + CollectionRowBuffer &rowBuffer = m_collectionPool->getRowBuffer( guid ); + // copy all attributes + rowBuffer.attributeList() = cursor.currentRow().attributeList(); + // copy the tokens + for( TokenList::const_iterator ti = tokens.begin(); ti != tokens.end(); ++ti ) { + if( ti.tokenName() == srcRefName ) { + ti->setData( &rowBuffer.tokenList()[ dstRefName ] ); + } else { + ti->setData( &rowBuffer.tokenList()[ ti.tokenName() ] ); + } + } + m_collectionPool->insertRow( guid, rowBuffer ); + } + m_log << coral::Info << "Finished writing all events from input collection " << collection->description().name() << coral::MessageStream::endmsg; + } +} + + +// Example of a copy metadata methos +// this one does nothing very useful +void +CollSplitByGUIDBase::copyMetadata() +{ + CollectionPool::flushing_iterator i( m_collectionPool ); + i.forceOpen(); // ensures all accessed collections are open + while( i.isValid() ) { + i->metadata().setValueForKey( "CreatedBy", m_thisProgram ); + ++i; + } +} + + + +void +CollSplitByGUIDBase::finalize() +{ + for( unsigned int i=0; i<m_srcCollections.size(); i++ ) { + m_srcCollections[i]->close(); + } + delete m_collectionPool; // this will close output collections + m_collectionPool = 0; +} + + +/// use a different collection pool handler than the default one +void +CollSplitByGUIDBase::setCollectionPool( CollectionPool* cp ) +{ + if( !cp ) return; + if( m_collectionPool ) delete m_collectionPool; + m_collectionPool = cp; +} + + + +bool +CollSplitByGUIDBase::readGuidList( const std::string& filename ) +{ + ifstream file( filename.c_str() ); + const int linelen = 1000; + char line[linelen]; + + string collectionName; + while( file.good() ) { + file.getline(line, linelen); + char *p = line; + while( *p == ' ' || *p == '\t' ) p++; + if( *p == 0 || *p == '#' ) + continue; + if( *p == ':' ) { + char *q = ++p; + while( *q != 0 ) q++; + while( *(q-1) == ' ' || *(q-1) == '\t' ) q--; + collectionName = string(p,q); + if( collectionName.empty() ) { + // need to generate a default output collection name here, so the GUIDs are properly grouped + collectionName = generateNextCollName(); + } + continue; + } + char *q = p; + while( *q != 0 ) q++; + while( *(q-1) == ' ' || *(q-1) == '\t' ) q--; + string guid = string(p,q); + CollNameforGuidMap[ guid ] = collectionName; + } + if( file.bad() ) { + cerr << "Warning: problem reading input file <" << filename + << "> " << endl; + file.close(); + return false; + } + + file.close(); + return true; +} + + +/* + string guid: GUID to look up in the user-specified GUID list +*/ +std::string +CollSplitByGUIDBase::collectionNameForGuid( const std::string& guid ) +{ + std::map<std::string,std::string>::const_iterator i = CollNameforGuidMap.find( guid ); + if( i != CollNameforGuidMap.end() ) { + return i->second; + } + return ""; +} + + +// generate next output collection name +std::string +CollSplitByGUIDBase::generateNextCollName( ) +{ + std::stringstream collectionNameStream; + collectionNameStream << "sub_collection_" << m_outputCollSeqN++; + return collectionNameStream.str(); +} + diff --git a/Database/APR/CollectionUtilities/src/CollectionMetadataParser.cpp b/Database/APR/CollectionUtilities/src/CollectionMetadataParser.cpp new file mode 100755 index 0000000000000000000000000000000000000000..970b88f729dd14f34287f41c644d3900dd649dfa --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CollectionMetadataParser.cpp @@ -0,0 +1,226 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include <boost/lexical_cast.hpp> +#include "POOLCore/boost_tokenizer_headers.h" +#include "CollectionUtilities/CollectionMetadataParser.h" +#include <algorithm> +#include <iostream> + +using namespace std; +using namespace pool; + +//help icc navigate pairs minefield, and speed up things a touch +const std::string SELFIE("SELF"); + +CollectionMetadataParser::CollectionMetadataParser() : m_numLevels(0) {} + +CollectionMetadataParser::CollectionMetadataParser(pool::ICollectionMetadata& cmd) : + m_numLevels(0) +{ + bool stat = this->init(cmd); + if (!stat) std::cout << "Unable to initialize parser" << std::endl; + else std::cout << "Parser initialized" << std::endl; +} + +bool CollectionMetadataParser::init(pool::ICollectionMetadata& cmd) +{ + cout << "CollectionMetadataParser: reading metadata..." << endl; + + // first clear all the containers + m_levelList.clear(); + m_tree.clear(); + //m_nodes.clear(); + m_nodeconns.clear(); + m_nodevalues.clear(); + m_keyvalues.clear(); + + bool status(true); + pool::ICollectionMetadata::const_iterator cmd_it = cmd.begin(); + + //std::cout << "About to loop over metadata in parser init" << std::endl; + while (cmd_it != cmd.end()) { + std::string concatkey = cmd_it.key(); + std::string value = cmd_it.value(); + + // Now parse the key + boost::char_separator<char> sep("|"); + boost::tokenizer<boost::char_separator<char> > tokens(concatkey,sep); + int size = std::distance(tokens.begin(), tokens.end()); + //std::cout << "The num elements in " << concatkey << " is " << size << std::endl; + status = (size > 1); + if (status) { + boost::tokenizer<boost::char_separator<char> >::iterator it = tokens.begin(); + boost::tokenizer<boost::char_separator<char> >::iterator key_it = tokens.begin(); + for (int i=0; i<size-1; ++i) ++key_it; + std::string key = boost::lexical_cast<std::string>(*key_it); + ProvNode child(SELFIE); + ProvNode parent; + //std::cout << "Looking for parents" << std::endl; + while (it != key_it) { + //m_nodes.insert(*it); + parent = boost::lexical_cast<std::string>(*it); + // fill node connection vector ROOT->first->second->...->LEAF + m_nodeconns.insert(std::make_pair(parent,child)); + child = parent; + ++it; + } // tokenizer + std::string source = child; + m_nodevalues.insert( + std::make_pair(source, + std::make_pair(key,value))); + m_keyvalues.insert( + std::make_pair(key, + std::make_pair(source,value))); + } + else if (size==1) { + m_nodeconns.insert(std::make_pair(SELFIE,SELFIE)); + std::string source = SELFIE; + m_nodevalues.insert( + std::make_pair(source, + std::make_pair(concatkey,value))); + m_keyvalues.insert( + std::make_pair(concatkey, + std::make_pair(source,value))); + } + ++cmd_it; + } // ICollectionMetadataIterator + cout << "CollectionMetadataParser: parsing..." << endl; + this->buildTree(); + return status; +} + +bool CollectionMetadataParser::hasSelfData() const +{ + std::vector<ProvNode> Level = getParents(ProvNode(SELFIE));; + std::vector<ProvNode>::const_iterator self_it= std::find(Level.begin(),Level.end(),SELFIE); + return (self_it != Level.end()); +} + +std::set<ProvNode> +CollectionMetadataParser::listNodes() const +{ + //return m_nodes; + std::set<ProvNode> nodes; + for (int i=0; i<m_numLevels; ++i) { + std::set<ProvNode> temps = this->listNodes(i); + std::set<ProvNode>::const_iterator tempit = temps.begin(); + while (tempit != temps.end() ) { + nodes.insert(*tempit); + ++tempit; + } + } + return nodes; +} + +std::set<ProvNode> +CollectionMetadataParser::listNodes(int level) const +{ + std::set<ProvNode> nodes; + for (std::multimap<int,ProvNode>::const_iterator it = m_levelList.lower_bound(level); + it != m_levelList.upper_bound(level); + ++it) { + nodes.insert(it->second); + } + return nodes; +} + +std::vector<ProvNode> +CollectionMetadataParser::getChildren(const ProvNode& p) const +{ + std::vector<ProvNode> children; + std::set< std::pair<ProvNode,ProvNode> >::const_iterator it = m_nodeconns.begin(); + while (it != m_nodeconns.end() ) { + if (it->first==p) children.push_back(it->second); + ++it; + } + return children; +} + +std::vector<ProvNode> +CollectionMetadataParser::getParents (const ProvNode& c) const +{ + std::vector<ProvNode> parents; + std::set< std::pair<ProvNode,ProvNode> >::const_iterator it = m_nodeconns.begin(); + while (it != m_nodeconns.end() ) { + if (it->second==c) parents.push_back(it->first); + ++it; + } + return parents; +} + +std::map<ProvNode, std::vector<ProvNode> > CollectionMetadataParser::getTree() const +{ + return m_tree; +} + +void CollectionMetadataParser::buildTree() +{ + std::vector<ProvNode> currentLevel = getParents(ProvNode(SELFIE));; + std::vector<ProvNode> nextLevel; + m_numLevels = 0; + std::vector<ProvNode> selfvec; selfvec.push_back(ProvNode(SELFIE)); + //if (hasSelfData()) { + m_levelList.insert(std::make_pair(m_numLevels,SELFIE)); + m_tree.insert(std::make_pair(SELFIE,selfvec)); + //} + m_numLevels++; + while (currentLevel.size()>0) { + //std::cout << "for numLevel = " << m_numLevels << ", processing currentLevel of size = " << currentLevel.size() << std::endl; + nextLevel.clear(); + std::vector<ProvNode>::const_iterator it = currentLevel.begin(); + while (it!=currentLevel.end()) { + if (*it!=SELFIE) { + m_levelList.insert(std::make_pair(m_numLevels,*it)); + std::vector<ProvNode> parents = getParents(*it); + m_tree.insert(std::make_pair(*it,parents)); + std::copy(parents.begin(),parents.end(),back_inserter(nextLevel)); + } + ++it; + } + currentLevel = nextLevel; + m_numLevels++; + } + std::cout << "Provenance tree has " << m_numLevels << " levels" << std::endl; +} + +std::set< std::pair<MetaKey,MetaValue> > +CollectionMetadataParser::getUniqueMetaForNode(const ProvNode& node) const +{ + std::set< std::pair<MetaKey,MetaValue> > keyAndValue; + std::multimap<ProvNode,std::pair<MetaKey,MetaValue> >::const_iterator last = m_nodevalues.upper_bound(node); + std::multimap<ProvNode,std::pair<MetaKey,MetaValue> >::const_iterator first = m_nodevalues.lower_bound(node); + while (first!=last) { + keyAndValue.insert(first->second); + ++first; + } + return keyAndValue; +} + +std::vector< std::pair<MetaKey,MetaValue> > +CollectionMetadataParser::getMetaForNode(const ProvNode& node) const +{ + std::vector< std::pair<MetaKey,MetaValue> > keyAndValue; + std::multimap<ProvNode,std::pair<MetaKey,MetaValue> >::const_iterator last = m_nodevalues.upper_bound(node); + std::multimap<ProvNode,std::pair<MetaKey,MetaValue> >::const_iterator first = m_nodevalues.lower_bound(node); + while (first!=last) { + keyAndValue.push_back(first->second); + ++first; + } + return keyAndValue; +} + +std::vector< std::pair<ProvNode,MetaValue> > +CollectionMetadataParser::getMetaForKey(const MetaKey& key) const +{ + std::vector< std::pair<ProvNode,std::string> > nodeAndValue; + std::multimap<MetaKey,std::pair<ProvNode,MetaValue> >::const_iterator last = m_keyvalues.upper_bound(key); + std::multimap<MetaKey,std::pair<ProvNode,MetaValue> >::const_iterator first = m_keyvalues.lower_bound(key); + while (first!=last) { + nodeAndValue.push_back(first->second); + ++first; + } + return nodeAndValue; +} + diff --git a/Database/APR/CollectionUtilities/src/CollectionPool.cpp b/Database/APR/CollectionUtilities/src/CollectionPool.cpp new file mode 100755 index 0000000000000000000000000000000000000000..a56bd62d888ec79e2a8b6418dc581e6098812cf4 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/CollectionPool.cpp @@ -0,0 +1,142 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/CollectionPool.h" +#include "CollectionBase/ICollectionDataEditor.h" +#include "CollectionBase/CollectionDescription.h" + +#include <stdexcept> + +using namespace pool; +using namespace std; + + +CollectionPool::CollectionPool( unsigned maxOpen, unsigned cacheSize) { + m_maxOpen = ( maxOpen>1? maxOpen : 2 ); + m_rowCacheSize = cacheSize; +} + +CollectionPool::~CollectionPool() { + flushing_iterator i( this ); + while( i.isValid() ) { + ICollection *coll = &*i; + ++i; + // order important - delete after moving to the next + delete coll; + } +} + + +void CollectionPool::addCollection( const std::string& guid, ICollection* coll ) { + if( m_map.find(guid) != m_map.end() ) + throw std::runtime_error("Attempt to overwrite GUID in collections map"); + m_map[guid] = coll; + if( m_rowCache.find(coll) == m_rowCache.end() ) { + // new collection + m_rowCache[ coll ] = rowVect_t(); + m_rowCache[ coll ].reserve( m_rowCacheSize/2+1); + if( coll->isOpen() ) { + queueOpenColl( coll ); + } + } +} + + +ICollection* CollectionPool::get( const std::string& guid ) { + ICollection* coll = m_map[guid]; + if( !coll->isOpen() ) { + coll->open(); + queueOpenColl( coll ); + } + return coll; +} + +void CollectionPool::insertRow( const std::string& guid , const pool::CollectionRowBuffer& row ) { + ICollection* coll = m_map[guid]; + if( coll->isOpen() ) { + coll->dataEditor().insertRow( row ); + } else { + rowVect_t &rowVect = m_rowCache[ coll ]; + rowVect.push_back( row ); + if( rowVect.size() >= m_rowCacheSize ) { + coll->open(); + writeCache( coll ); + queueOpenColl( coll ); + } + } +} + +pool::CollectionRowBuffer& CollectionPool::getRowBuffer( const std::string& ) { + return m_rowBuffer; +} + +const std::string& CollectionPool::getDstRefName() { + return m_dstRefName; +} + + +void CollectionPool::queueOpenColl( pool::ICollection* coll ) { + if( m_queue.empty() ) { + // first open collection in - store a rowBuffer copy + m_rowBuffer = coll->dataEditor().rowBuffer(); + m_dstRefName = coll->description().eventReferenceColumnName(); + } + if( m_queue.size() >= (size_t)m_maxOpen ) { + reduceQueue(); + } + m_queue.push( coll ); +} + +void CollectionPool::reduceQueue() { + ICollection *coll = m_queue.front(); + m_queue.pop(); + writeCache( coll ); + coll->commit(); + coll->close(); +} + +void CollectionPool::writeCache( pool::ICollection* coll ) { + rowVect_t &rowVect = m_rowCache[ coll ]; + for( rowVect_t::const_iterator ri = rowVect.begin(), rend = rowVect.end(); + ri != rend; ++ri ) { + coll->dataEditor().insertRow( *ri ); + } + rowVect.clear(); +} + + + + + +// NOTE - the current collection can NOT be deleted before calling ++ +CollectionPool::flushing_iterator& +CollectionPool::flushing_iterator::operator++() +{ + ICollection *coll = m_iter->first; + // reopen to trigger writing out of cached rows + // NOTE - this is done >during< moving to the next collection + if( !coll->isOpen() && m_iter->second.size() ) { + coll->open(); + } + if( coll->isOpen() ) { + m_cpool->writeCache( coll ); + coll->commit(); + coll->close(); + } + ++m_iter; + return *this; +} + + +ICollection* CollectionPool::flushing_iterator::operator->() +{ + ICollection *coll = m_iter->first; + if( !coll->isOpen() && m_forceOpen ) { + coll->open(); + } + return coll; +} + + + diff --git a/Database/APR/CollectionUtilities/src/GenericMetaHandler.cpp b/Database/APR/CollectionUtilities/src/GenericMetaHandler.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0e0f210f0557dee9e2b7892ab1b26ed918ce327 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/GenericMetaHandler.cpp @@ -0,0 +1,48 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/GenericMetaHandler.h" +#include <iostream> + +using namespace std; + +void pool::GenericMetaHandler::process(const pool::ICollMetaHandler::CopyMode& mode, + MetaValue& value, + pool::ICollMetaHandler::CollectionMetadata& out, + const ProvNode& prefix, + MetaKey& key) +{ + std::string outval; + pool::ICollMetaHandler::CollectionMetadata::const_iterator mit = out.find(key); + if (mit!=out.end()) outval=mit->second; + if (mode==ICollMetaHandler::merge) this->Merge (key,value,outval,prefix); + else if (mode==ICollMetaHandler::extract) this->Extract(key,value,outval,prefix); + else if (mode==ICollMetaHandler::copy) this->Copy (value,outval); + else { + std::cerr << "ERROR, INVALID COPY MODE" << mode << std::endl; + } + if (mit!=out.end()) out[key] = outval; + else out.insert(std::make_pair(key,outval)); +} + + +void pool::GenericMetaHandler::Merge (MetaKey& key, MetaValue& value, MetaValue& out, const ProvNode& prefix) +{ + // for merge always prepend provnode to key even for self + key = prefix + "|" + key; + out = value; +} + +void pool::GenericMetaHandler::Extract(MetaKey& key, MetaValue& value, MetaValue& out, const ProvNode& prefix) +{ + // for extract prepend provnode to key if not SELF + if (prefix != "SELF") key = prefix + "|" + key; + out = value; +} + +void pool::GenericMetaHandler::Copy (MetaValue& value, MetaValue& out) +{ + // for copy leave values unchanged + out = value; +} diff --git a/Database/APR/CollectionUtilities/src/MaxEventsInfo.cpp b/Database/APR/CollectionUtilities/src/MaxEventsInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3eaed2c5722049d5131ac06fbe1c3c65364c3fff --- /dev/null +++ b/Database/APR/CollectionUtilities/src/MaxEventsInfo.cpp @@ -0,0 +1,39 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + + +#include "CollectionUtilities/MaxEventsInfo.h" + +#include <iostream> + +#define corENDL coral::MessageStream::endmsg + +using namespace std; +using namespace pool; + + + +MaxEventsInfo::MaxEventsInfo( ) : + CmdLineArgs2("MaxEventsInfo"), + m_specified(false), + m_maxEvents(-1) +{ + getArgQual("-nevents").desc << "finish after processing N events"; +} + + +bool +MaxEventsInfo::evalArgs(std::vector<std::string>& argv) +{ + if( !CmdLineArgs2::evalArgs(argv) ) return false; + + string nevents; + if( getOpt("-nevents", nevents) ) { + m_specified = true; + istringstream ne_str( nevents ); + ne_str >> m_maxEvents; + } + return true; +} + diff --git a/Database/APR/CollectionUtilities/src/MetaInfo.cpp b/Database/APR/CollectionUtilities/src/MetaInfo.cpp new file mode 100755 index 0000000000000000000000000000000000000000..b4144e223cca52a785d17f99ae83f636f1e328e4 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/MetaInfo.cpp @@ -0,0 +1,321 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionMetadata.h" +#include "CollectionBase/CollectionBaseNames.h" +#include "CoralBase/MessageStream.h" + +#include <algorithm> +#include <iostream> + +#include "CollectionUtilities/MetaInfo.h" +#include "CollectionUtilities/GenericMetaHandler.h" +#include "CollectionUtilities/CollMetaRegistry.h" + +/** + * @class MetaInfo + * + * @brief MetaInfo is an concretization of a CmdLineArgs2 + * to process metadata-related arguments + * Now also deals with metadata copying + */ + +#define corENDL coral::MessageStream::endmsg + +using namespace std; +using namespace pool; + +MetaInfo::MetaInfo( const std::string& logname ) : + CmdLineArgs2( logname ) + , m_noMetadata( true ) + , m_copyMode( ICollMetaHandler::merge ) + , m_log( logname ) +{ + m_markers.insert( std::make_pair("-metadata", ArgQual(2,true,false)) ); + m_markers.insert( std::make_pair("-copy", ArgQual(0)) ); + m_markers.insert( std::make_pair("-extract", ArgQual(0)) ); + m_markers.insert( std::make_pair("-merge", ArgQual(0)) ); + m_markers.insert( std::make_pair("-nometadata", ArgQual(0)) ); + m_markers.insert( std::make_pair("-metaopt", ArgQual(1,true,false)) ); + m_markers["-metadata"].desc << "<metadata key> <metadata value> " << std::endl + << "List of space separated key-value pairs of metadata entries " + << "that will be added to the output collection(s). " + << "Keys are automatically wildcarded, so key='DOH' " + << "includes '*DOH*' key entries. DEFAULT=keepAll"; + m_markers["-copy"].desc << "Make exact copy of 1 source collection "; + m_markers["-extract"].desc << "Combine N source collections, prepend existing metadata keys " + << "with source collection name. " + << "Add new metadata without prefix, include metadata for: " + << "InputCollections, OutputCollections, Query, ExtractNode, TimeBegin, TimeEnd. " + << "(useful for creating standalone destination collections)"; + m_markers["-merge"].desc << "Combine N source collections, prepend existing metadata keys " + << "with source collection name. " + << "Add user supplied metadata with source collection name prefix. " + << "(useful for appending to exiting destination collections). This is the DEFAULT. "; + m_markers["-nometadata"].desc << "Ignore metadata. " + << "(will still accept new metadata from the command line -metadata option)" + << "Same as -metaopt 'NONE' (DEPRECATED)"; + m_markers["-metaopt"].desc << "Space-separated list of input metadata keys to retain in output. \n" + << "DEFAULT='NONE' (same as -nometadata), keyword 'ALL' also available."; + + setArgQuals(m_markers); + m_genericHandler = new GenericMetaHandler(); +} + + +MetaInfo::~MetaInfo() +{ + delete m_genericHandler; +} + + + +bool +MetaInfo::evalArgs(std::vector<std::string>& argv) +{ + if( !CmdLineArgs2::evalArgs(argv) ) + return false; + + vector<string> newmetadata; + if( getOpts("-metadata", newmetadata) ) { + for( size_t i = 0; i < newmetadata.size(); i+=2 ) { + const string& key = newmetadata[i], val = newmetadata[i+1]; + m_cmdlineMetadata[ key ] = val; + } + } + + if( this->hasQual("-copy") ) { + m_copyMode = ICollMetaHandler::copy; + } + if ( this->hasQual("-extract")) { + m_copyMode = ICollMetaHandler::extract; + } + if ( this->hasQual("-merge")) { + m_copyMode = ICollMetaHandler::merge; + } + + vector<string> keepkeys; + if( getOpts("-metaopt", keepkeys) ) { + if ( keepkeys[0] == "ALL" ) m_noMetadata=false; + else if ( keepkeys[0] == "NONE" ) m_noMetadata=true; + else { + m_noMetadata=true; + for( size_t i = 0; i < keepkeys.size() ; ++i) { + // Check for any wildcarding + std::string::size_type sep = keepkeys[i].find("*"); + if (sep == std::string::npos) { + //std::cerr << "Keeping " << keepkeys[i] << std::endl; + m_wildkeepers.push_back(std::make_pair(keepkeys[i],"")); + } + else { + // std::cerr << "Found a wildcard" << std::endl; + std::pair<std::string,std::string> key; + key.first = keepkeys[i].substr(0,sep); + key.second = keepkeys[i].substr(sep+1,keepkeys[i].size()); + if (key.first.find("*")==std::string::npos && + key.second.find("*")==std::string::npos) m_wildkeepers.push_back(key); + else std::cerr << "Multiple wildcards not supported, disregarding " << keepkeys[i] << std::endl; + } + } + } + } + + else if ( this->hasQual("-nometadata")) { + m_noMetadata = true; + } + + return true; +} + + +/* +void +MetaInfo::addMetaEntry(std::string key, std::string val) +{ + // overwrites existing key values (by design) + // In case this is not good, we need to expand API + m_metadata[ key ] = val; +} +*/ + + + +bool MetaInfo::shouldCopy(string key) +{ + if( !m_noMetadata ) + return true; + + bool keep=false; + if( m_wildkeepers.size() ) { + for (unsigned int i=0; i < m_wildkeepers.size(); ++i) { + std::pair<std::string,std::string> test = m_wildkeepers[i]; + if (!test.first.empty() && test.second.size()) { + //std::cout << "Look " << test.first << " && " << test.second << " in " << key << std::endl; + if (key.find(test.first) != std::string::npos && + key.find(test.second) != std::string::npos) keep=true; + } + else if (!test.first.empty()) { + //std::cout << "Look " << test.first << " in " << key << std::endl; + if (key.find(test.first) != std::string::npos) keep=true; + } + else if (!test.second.empty()) { + //std::cout << "Look " << test.second << " in " << key << std::endl; + if (key.find(test.second) != std::string::npos) keep=true; + } + } + } + return keep; +} + + +bool +MetaInfo::checkMetadata( std::vector<pool::ICollection*> inputs, + std::vector<pool::ICollection*> outputs ) +{ + if( m_noMetadata || m_copyMode != ICollMetaHandler::merge ) { + return true; + } + for( vector<ICollection*>::const_iterator input = inputs.begin(), + end = inputs.end(); input != end; ++input ) + { + ICollection* const &srcColl = *input; + ICollectionMetadata &srcMeta = srcColl->metadata(); + string srcCollName = srcColl->description().name(); + if( srcMeta.existsKey("Name") ) { + srcCollName = srcMeta.getValueForKey("Name"); + m_log << coral::Debug << "Input collection " + << srcColl->description().name() + << " has internal metadata Name=" << srcCollName << corENDL; + } + + long long sm_n = srcMeta.entries(); + for( vector<ICollection*>::const_iterator output = outputs.begin(), + end = outputs.end(); output != end; ++output ) + { + ICollection* const &destColl = *output; + ICollectionMetadata &dstMeta = destColl->metadata(); + long long dm_n = dstMeta.entries(); + + bool exists = false; + if( sm_n * 10000 < dm_n ) { + m_log << coral::Info << "Destination collection metadata has " << dm_n << " entries. " + << " Checking for KEY conflicts with single queries." << corENDL; + for( ICollectionMetadata::const_iterator it = srcMeta.begin(), + end = srcMeta.end(); it != end; ++it ) { + if( dstMeta.existsKey( srcCollName + "|" + it.key() ) ) { + exists = true; + break; + } + } + } else { + for( ICollectionMetadata::const_iterator it = dstMeta.begin(), + end = dstMeta.end(); it != end; ++it ) { + if( it.key().compare(0, srcCollName.length()+1, srcCollName+"|" ) == 0 ) { + exists = true; + break; + } + } + } + if( exists ) { + m_log << coral::Warning << "*** Metadata from source collection " + << srcCollName << " already exists in destination collection " + << destColl->description().name() + << ". It will NOT be copied to any collection." << corENDL; + return false; + } + } + } + return true; +} + + + +void MetaInfo::addMetaEntry(ICollMetaHandler::MetaKey key, + ICollMetaHandler::MetaKey val, + ICollMetaHandler::ProvNode srcCollection) +{ + // std::cout << "+++ key= " << key << "; val = " << val << std::endl; + ICollMetaHandler* h = m_genericHandler; + CollMetaRegistry* metareg = CollMetaRegistry::Instance(); + if (metareg==0) { + std::cout << "Could not retrieve CollMetaRegistry" << std::endl; + return; + } + // see if there is a special handler for this key + CollMetaRegistry::Container::const_iterator it = metareg->seek(key); + if( it != metareg->end() ) + h = it->second; + h->process(m_copyMode, val, m_metadata, srcCollection, key); +} + + + +void +MetaInfo::readMetadata( std::vector<pool::ICollection*> inputs ) +{ + for( vector<ICollection*>::const_iterator input = inputs.begin(), + end = inputs.end(); input != end; ++input ) + { + ICollection* const &srcColl = *input; + ICollectionMetadata &srcMeta = srcColl->metadata(); + string srcCollName = srcColl->description().name(); + if( srcMeta.existsKey("Name") ) { + srcCollName = srcMeta.getValueForKey("Name"); + } + + for( ICollectionMetadata::const_iterator sit = srcMeta.begin(), + end = srcMeta.end(); sit != end; ++sit ) + { + const string &key = sit.key(); + if( shouldCopy(key) ) + addMetaEntry( key, sit.value(), srcCollName ); + } + m_log << coral::Info << "Source metadata size after processing collection " + << srcCollName << " is " << m_metadata.size() << corENDL; + + // add new metadata from the commandline + // ignores -nometadata option m_cmdlineMetadata[ + for( MetaIterator i = m_cmdlineMetadata.begin(), end = m_cmdlineMetadata.end(); + i != end; ++i ) { + const string &key = i->first; + if( shouldCopy(key) ) { + addMetaEntry( key, i->second ); + m_log << coral::Debug << "adding commandline metadata " << key + << "=" << i->second << corENDL; + } + } + } +} + + +void +MetaInfo::writeMetadata(std::vector<pool::ICollection*> outputs) +{ + // write all metadata + for( vector<ICollection*>::const_iterator output = outputs.begin(), + end = outputs.end(); output != end; ++output ) + { + ICollection* const &destColl = *output; + for( MetaInfo::MetaIterator mit = this->begin(), end = this->end(); mit != end; ++mit ) { + const string &key = mit->first; + if (!destColl->metadata().existsKey( key )) { + destColl->metadata().setValueForKey(key, mit->second); + } else { + // merge values for existing keys + if( key == CollectionBaseNames::CollIDMdataKey() ) + continue; // ignore the GUID entry + string dstval( destColl->metadata().getValueForKey(key) ); + // first add it to the container + addMetaEntry(key, dstval); + // then just take it from the container + destColl->metadata().setValueForKey( key, m_metadata[key] ); + } + m_log << coral::Debug << "Metadata insert key=" << key << ", value len=" + << m_metadata[key].length() << corENDL; + } + } +} + diff --git a/Database/APR/CollectionUtilities/src/Progress.cpp b/Database/APR/CollectionUtilities/src/Progress.cpp new file mode 100755 index 0000000000000000000000000000000000000000..ebc65a0f3be7f9d609c0559620b32195495901e3 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/Progress.cpp @@ -0,0 +1,56 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "CollectionUtilities/Progress.h" + +#include <fstream> +using namespace std; +using namespace pool; + +const std::string progress_opt("-progress"); + +Progress::Progress() + : CmdLineArgs2( "ProgressMeter" ) + , m_valid( false ) + , m_stdout( false ) +{ + QualList markers; + markers.insert( std::make_pair(progress_opt, ArgQual(1, false, false)) ); + markers[progress_opt].desc << "Write out progress indicator to a given file ( 'stdout' prints to stdout )"; + setArgQuals(markers); +} + + +bool Progress::evalArgs(std::vector<std::string>& argv) +{ + bool retc = CmdLineArgs2::evalArgs(argv); + if( retc && hasQual(progress_opt) ) { + int ifirst = (*this)[progress_opt].first; + m_outFName = argv[ifirst]; + } + if( m_outFName == "stdout" ) { + m_stdout = true; + } + m_valid = true; + return retc; +} + + +void Progress::print( const std::string& action, int percentage ) +{ + if( !m_valid ) return; + + std::streambuf *buf; + if( !m_stdout ) { + m_file.open( m_outFName.c_str(), ios_base::trunc | ios_base::out ); + buf = m_file.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream out(buf); + + out << percentage << ";" << action << endl; + + if( !m_stdout ) m_file.close(); +} diff --git a/Database/APR/CollectionUtilities/src/QueryInfo.cpp b/Database/APR/CollectionUtilities/src/QueryInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..97b923b51d9ad40862db0c9a07b4df248a2b4870 --- /dev/null +++ b/Database/APR/CollectionUtilities/src/QueryInfo.cpp @@ -0,0 +1,175 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + + +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CoralBase/MessageStream.h" +#include "CollectionUtilities/Args2Container.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionFactory.h" + +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CoralBase/Attribute.h" + +#include "POOLCore/Exception.h" + +#include <memory> +#include <set> +#include <sstream> + +#include <iostream> + +#define corENDL coral::MessageStream::endmsg + +using namespace std; +using namespace pool; + + + +QueryInfo::QueryInfo( ) : CmdLineArgs2("QueryInfo") +{ + QualList markers; + markers["-query"] = ArgQual("[predicate string (DEFAULT = \"\")]", 1, true); + markers["-queryopt"].desc << "<comma separated attribute list> (DEFAULT = * )" << endl + << "if the Primary token is missing from the list, the first listed token will become the Primary"; + markers["-trigqruns"].desc << "runs for Trigger Query bit position Decoding, format: 1,3,5-12,22" + << " NOTE: this option disables the autodetection of run numbers"; + markers["-trigqproject"].desc << "MC project name for Trigger Query (MC only, not data!)"; + markers["-trigqamitag"].desc << "MC project AMI tag for Trigger Query (MC only, not data!)"; + + this->setArgQuals(markers); +} + + +const std::string& +QueryInfo::query( size_t qn ) +{ + if( m_queries.size() == 0 ) + m_queries.push_back(string("")); + if( m_queries.size() == 1 ) + return m_queries[0]; + if( m_queries.size() > qn ) + return m_queries[qn]; + throw pool::Exception( "Attempt to get query# bigger than supplied on the command line", + "QueryInfo::query(N)", "QueryInfo" ); +} + + + +bool +QueryInfo::evalArgs(std::vector<std::string>& argv) +{ + if( !CmdLineArgs2::evalArgs(argv) ) return false; + + getOpts("-query", m_queries); + getOpt("-queryopt", m_queryOptions); + + return true; +} + + +bool +QueryInfo::evalArgsPass2(std::vector<std::string>& , pool::Args2Container& container) +{ + if( query().find("TriggerPassed") == string::npos && query().find("TriggerNotPassed") == string::npos ) + return true; + + if( coral::MessageStream::msgVerbosity() == coral::Debug + || coral::MessageStream::msgVerbosity() == coral::Verbose ) { + queryRemapper.setDebug(); + } + + string amitag, project; + if( getOpt("-trigqproject", project) ) { + queryRemapper.setDataPeriod( project ); + } + if( getOpt("-trigqamitag", amitag) ) { + queryRemapper.setAMITag( amitag ); + } + + SrcInfo* srcinfo = 0; + for( Args2Container::const_iterator ait = container.begin(); ait != container.end(); ++ait) { + if( (*ait)->name()=="SrcInfo" ) { + srcinfo = dynamic_cast<SrcInfo*>( *ait ); + } + } + if( !srcinfo ) { + container.m_log << coral::Error << "QueryInfo could not find SourceInfo" << corENDL; + return false; + } + + string runs; + bool userruns = getOpt("-trigqruns", runs); + + string srcTech; + CollectionFactory *factory = CollectionFactory::get(); + set<unsigned> runset; + for( unsigned int i=0; i<srcinfo->nSrc(); i++ ) { + // get the technology without opening the collection + CollectionDescription desc( srcinfo->name(i), srcinfo->type(i), srcinfo->connect() ); + factory->resolveDescription( desc ); + + if( !userruns ) { + // need to find out which runs for the trigger query + auto_ptr<ICollection> collection( factory->create( desc, ICollection::READ ) ); + auto_ptr<ICollectionQuery> collQuery( collection->newQuery() ); + collQuery->addToOutputList( "RunNumber" ); + ICollectionCursor& cursor = collQuery->execute(); + while( cursor.next() ) { + unsigned runN = cursor.currentRow().attributeList()["RunNumber"].data<unsigned>(); + runset.insert( runN ); + } + } + string tech = desc.type(); + if( i==0 ) { + srcTech = tech; + continue; + } + if( srcTech != tech ) { + container.m_log << coral::Error << "can not mix up imput ROOT and Relational " + << " collection types when querying triggers" << corENDL; + return false; + } + } + + if( !userruns ) { + // convert autodetected runs into a string version + ostringstream runstr; + unsigned rbegin = 0, rlast = 0; + bool loopend = false; + for( set<unsigned>::const_iterator r = runset.begin(); !loopend; rlast = *r, ++r) { + // loopend condition here to provide one more final loop iteration + loopend = ( r == runset.end() ); + if( !loopend && rlast && *r == rlast+1 ) { + // start range + if( !rbegin ) rbegin = rlast; + } else { + if( runstr.str().length() ) runstr << ","; + if( rbegin ) { + // there was a range + runstr << rbegin << "-" << rlast; + rbegin = 0; + } else { + if( rlast ) runstr << rlast; + } + } + } + runs = runstr.str(); + container.m_log << coral::Info << "Autodetected runs for trigger conditions: " << runs << corENDL; + } + + queryRemapper.queryRuns( runs ); + m_queries[0] = queryRemapper.triggerQueryRemap( m_queries[0], srcTech ); + + return true; +} + diff --git a/Database/APR/CollectionUtilities/src/UtilityFuncs.cpp b/Database/APR/CollectionUtilities/src/UtilityFuncs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..db7b65556b6b028bab5a1b1fe958c3c6573bc16c --- /dev/null +++ b/Database/APR/CollectionUtilities/src/UtilityFuncs.cpp @@ -0,0 +1,316 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file UtilityFuncs.h + * @brief prototypes for utility POOL collection funcs + * @author Jack.Cranshaw@cern.ch + * $Id: UtilityFuncs.cpp 495625 2012-04-13 13:54:17Z mnowak $ + */ + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/CollectionColumn.h" + +#include "CoralBase/MessageStream.h" +#include "POOLCore/Exception.h" + +#include "CoralBase/Attribute.h" + +#include <iostream> +#include <fstream> +#include <memory> + +#include "CollectionUtilities/UtilityFuncs.h" + +using namespace std; +using namespace pool; + +std::string pool::getKey(const std::string key, const std::string encoded) +{ + size_t keystart = encoded.find(key); + if (keystart!=string::npos) { + size_t keylen = encoded.find(";",keystart)-keystart; + //cout << "key begins at " << keystart << " with length " << keylen << " for " << encoded << endl; + if (keystart+keylen<encoded.size()) { + return encoded.substr(keystart+key.size(),keylen-key.size()); + } + else return ("NONE"); + } + else { + return string("NONE"); + } +} + +std::string pool::getGroup(const std::string encoded) +{ + return getKey("GRP=",encoded); +} + +std::set<std::string> pool::findGroups(const pool::ICollectionDescription& desc) +{ + set<string> groups; + // Assume only one fragment for now + for (int col = 0; col<desc.numberOfAttributeColumns(0); ++col) { + const pool::ICollectionColumn& coldata = desc.attributeColumn(col,0); + string groupdata = getGroup(coldata.annotation()); + set<string>::iterator it = groups.find(groupdata); + if (it==groups.end()) groups.insert(groupdata); + } + return groups; +} + +std::pair<unsigned int,unsigned int> +pool::getColumnWidths(const pool::ICollectionDescription& description, bool tokens=false) +{ + unsigned int maxNameSize=0,maxTypeNameSize=0; + if (tokens) { + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.tokenColumn(c, f); + if( maxNameSize < column.name().size() ) + maxNameSize = column.name().size(); + if( maxTypeNameSize < column.type().size() ) + maxTypeNameSize = column.type().size(); + } + } + } + else { + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfAttributeColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.attributeColumn(c, f); + if( maxNameSize < column.name().size() ) + maxNameSize = column.name().size(); + if( maxTypeNameSize < column.type().size() ) + maxTypeNameSize = column.type().size(); + } + } + } + return std::make_pair(maxNameSize,maxTypeNameSize); +} + +void pool::dumpGroups(const pool::ICollectionDescription& description) +{ + set<string> groups = findGroups(description); + std::cout << "--------------------------------------------------------------" << std::endl; + if (groups.find("NONE")==groups.end()) { + std::cout << "Number of groups is: " << groups.size() << std::endl; + std::cout << "Groups are: " <<std::endl; + // dump groups + set<string>::const_iterator git = groups.begin(); + while (git!=groups.end()) { + cout << " " << *git << endl; + ++git; + } + } + else { + std::cout << "Number of groups is: 0" << std::endl; + } +} + +void pool::dumpTokens(const pool::ICollectionDescription& description) +{ + string spectype("Token"); + std::pair<unsigned int,unsigned int> maxpair = getColumnWidths(description,true); + unsigned int maxNameSize = maxpair.first; + unsigned int maxTypeNameSize = maxpair.second; + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << "Number of " << spectype << "s is: " + << description.numberOfTokenColumns() << std::endl; + std::cout << spectype << "s are: " << std::endl; + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.tokenColumn(c, f); + std::cout.setf(std::ios::left); + std::cout << " NAME: "; + std::cout.width(maxNameSize+5); + std::cout << column.name(); + std::cout << " TYPE: "; + std::cout.width(maxTypeNameSize+5); + std::cout << column.type(); + std::cout << " INFO: "; + std::cout << column.annotation(); + std::cout << std::endl; + } + } + std::cout << " DEFAULT REF = " << description.eventReferenceColumnName() << std::endl; +} + +void pool::dumpAttributes(const pool::ICollectionDescription& description) +{ + string spectype("Attribute"); + std::pair<unsigned int,unsigned int> maxpair = getColumnWidths(description); + unsigned int maxNameSize = maxpair.first; + unsigned int maxTypeNameSize = maxpair.second; + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << "Number of " << spectype << "s is: " + << description.numberOfAttributeColumns() << std::endl; + std::cout << spectype << "s are: " << std::endl; + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfAttributeColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.attributeColumn(c, f); + std::cout.setf(std::ios::left); + std::cout << " NAME: "; + std::cout.width(maxNameSize+5); + std::cout << column.name(); + std::cout << " TYPE: "; + std::cout.width(maxTypeNameSize+5); + std::cout << column.type(); + std::cout << " INFO: "; + std::cout << column.annotation(); + std::cout << std::endl; + } + } +} + +void pool::dumpOverlapEval(const pool::ICollectionDescription& desc0, + const pool::ICollectionDescription& desc1, + const vector< pair<string,string> >& spec0, + const vector< pair<string,string> >& spec1, + const string spectype) +{ + // First check that inputs are valid + vector< vector< pair<string,string> > > Specs; + Specs.push_back(spec0); + Specs.push_back(spec1); + vector<string> Descs; + Descs.push_back(desc0.name()); + Descs.push_back(desc1.name()); + vector< pair<string,string> > tmp(max(spec0.size(),spec1.size())); + vector< pair<string,string> >::iterator last = + set_intersection( Specs[0].begin(),Specs[0].end(), + Specs[1].begin(),Specs[1].end(), + tmp.begin() ); + vector< pair<string,string> > output(tmp.begin(),last); + unsigned int intersize = output.size(); + if (intersize==Specs[0].size() && + Specs[0].size()==Specs[1].size()) { + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << " ***" << std::endl; + std::cout << " *** " << spectype << "s IDENTICAL for these Collections ***" << std::endl; + std::cout << " ***" << std::endl; + if (spectype=="Token") dumpTokens(desc0); + else if (spectype=="Attribute") dumpAttributes(desc0); + else cerr << "dumpOverlapEval ERROR: unknown spectype = " << spectype << endl; + } + else { + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << " ***" << std::endl; + std::cout << " *** " << spectype << "s DIFFER for these Collections ***" << std::endl; + std::cout << " ***" << std::endl; + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << "Number of Common " << spectype << " Names is: " + << output.size() << std::endl; + // First dump intersection + std::cout << "Common " << spectype << " Names are: " << std::endl; + std::ofstream intersect_names; + std::string name("intersect_names."+spectype); + intersect_names.open(name.c_str(),ios::out | ios::trunc); + for( vector< pair<string,string> >::iterator c = output.begin(); + c < output.end(); + c++ ) { + std::cout << " NAME: "; + std::cout << c->first; + std::cout << " TYPE: "; + std::cout << c->second; + std::cout << std::endl; + intersect_names << c->first << std::endl; + } + intersect_names.close(); + for (int i=0; i<2; ++i) { + int j; + if (i==0) j=1; else j=0; + std::ofstream names; + if (Specs[i].size() > intersize) { + std::cout << "Number of " << spectype << " Names Specific to " + << Descs[i] << " is: " + << Specs[i].size() - intersize << std::endl; + std::cout << spectype << " Names are: " << std::endl; + std::string name(Descs[i]+"Only."+spectype); + names.open(name.c_str(), ios::out | ios::trunc); + vector< pair<string,string> > tmp(max(Specs[0].size(),Specs[1].size())); + vector< pair<string,string> >::iterator last2 = + set_difference(Specs[i].begin(),Specs[i].end(), + Specs[j].begin(),Specs[j].end(), + tmp.begin()); + vector< pair<string,string> > solo(tmp.begin(),last2); + for( vector< pair<string,string> >::iterator c = solo.begin(); + c < solo.end(); + c++ ) { + std::cout << " NAME: "; + std::cout << c->first; + std::cout << " TYPE: "; + std::cout << c->second; + std::cout << std::endl; + names << c->first << std::endl; + } + } + names.close(); + } + } +} + +/* +bool isNonProvEqual(const pool::ICollectionDescription& desc0, + const pool::ICollectionDescription& desc1) { + if( desc0.equals(desc1) ) return true; + else { + map<int, vector<pair<string,string> > > tokSpecs; + vector< pair<string,string> > spec; + spec.clear(); + for( int f=0; f < desc0.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < desc0.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = desc0.tokenColumn(c, f); + spec.push_back(make_pair(column.name(),column.type())); + } + } + tokSpecs.insert(make_pair(0,spec)); + spec.clear(); + for( int f=0; f < desc1.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < desc1.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = desc1.tokenColumn(c, f); + spec.push_back(make_pair(column.name(),column.type())); + } + } + tokSpecs.insert(make_pair(1,spec)); + stable_sort(tokSpecs[0].begin(),tokSpecs[0].end()); + stable_sort(tokSpecs[1].begin(),tokSpecs[1].end()); + vector< pair<string,string> > spec0 = tokSpecs[0]; + vector< pair<string,string> > spec1 = tokSpecs[1]; + vector< pair<string,string> > tmp(max(spec0.size(),spec1.size())); + vector< pair<string,string> >::iterator last2 = + set_difference(spec0.begin(),spec0.end(), + spec1.begin(),spec1.end(), + tmp.begin()); + vector< pair<string,string> > solo(tmp.begin(),last2); + if (solo.size()==1) { + if (solo[0] == ) + } +} +*/ + +// Stole this little snippet from OOPWeb.com +// http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html +// to fill in a basic failure in std::string +// Essentially it does for C++ what strtok does for C + +void pool::Tokenize(const std::string& str, + std::vector<std::string>& tokens, + const std::string& delimiters) +{ + // Skip delimiters at beginning. + std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + std::string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) + { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } +} + diff --git a/Database/APR/CollectionUtilities/utilities/CollAppend.cpp b/Database/APR/CollectionUtilities/utilities/CollAppend.cpp new file mode 100755 index 0000000000000000000000000000000000000000..f7c9324fc89c7033b462172f861741d737ebe02f --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollAppend.cpp @@ -0,0 +1,27 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollAppend.cpp + * @brief Utility to copy and merge POOL collections - uses CollAppendBase class + * @author Marcin.Nowak@cern.ch + * $Id: CollAppend.cpp 458059 2011-09-09 13:52:47Z mnowak $ + */ + +#include "CollectionUtilities/CollAppendBase.h" + +using namespace std; +using namespace pool; + +int main(int argc, const char *argv[]) +{ + // Convert argv to vector of strings + vector<string> argv_v; + for( int i=0; i<argc; ++ i ) + argv_v.push_back( argv[i] ); + + CollAppendBase worker("CollAppend"); + return worker.execute( argv_v ); +} + diff --git a/Database/APR/CollectionUtilities/utilities/CollListAttrib.cpp b/Database/APR/CollectionUtilities/utilities/CollListAttrib.cpp new file mode 100755 index 0000000000000000000000000000000000000000..118be65755d26799fe6f0482aea4f91122cbcf71 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollListAttrib.cpp @@ -0,0 +1,219 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollListAttrib.cpp + * @brief Utility to list the metadata of a POOL collection + * @author K. Karr <Kristo.Karr@cern.ch> + * @author Marcin.Nowak@cern.ch + * $Id: CollListAttrib.cpp 506773 2012-06-21 15:40:21Z gemmeren $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionService.h" + +#include "POOLCore/Exception.h" + +#include "CoralBase/AttributeList.h" +#include "CoralBase/Attribute.h" +#include "CoralBase/MessageStream.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/CmdLineArgs2.h" +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionUtilities/UtilityFuncs.h" + +#include <iostream> +#include <iomanip> +#include <fstream> + +using namespace std; +using namespace pool; + + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollListAttrib"); + + try { + pool::CollectionService collectionService; + coral::MessageStream log( thisProgram ); + + Args2Container argsVec( thisProgram, true, &log ); + argsVec.desc << "Lists the attributes of a collection " + << "or pair of collections. " << endl << "If it is a pair, it will also " + << "tell which attributes are shared and which are not. " << endl + << "It will also print this information to files." ; + + // CLA specific qualifiers + pool::QualList markers; + markers["-nogroups"] = ArgQual("Do not print group information", 0); + markers["-notokens"] = ArgQual("Do not print token information", 0); + markers["-noattribs"] = ArgQual("Do not print attribute information", 0); + + pool::CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + // Classes with shared cli keys and their argument properties + // Add them to the list + // SrcInfo : input collections + // CatInfo : collection catalogs + CatalogInfo catinfo; argsVec.push_back(&catinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + bool doGroups = !cmdLineArgs.hasQual("-nogroups"); + bool doTokens = !cmdLineArgs.hasQual("-notokens"); + bool doAtts = !cmdLineArgs.hasQual("-noattribs"); + + if (srcinfo.nSrc()>2) { + cerr << "This program takes a max of 2 input collections" << endl; + cerr << "There were " << srcinfo.nSrc() << " inputs collections on the command line" << endl; + return 1; + } + + catinfo.setCatalogs( &collectionService ); + + unsigned int collCounter = 0; + std::string attribName = ""; + std::string attribTypeName = ""; + + map<int, pool::ICollection*> collMap; + map<int, vector<pair<string,string> > > tokSpecs; + map< int, vector< pair<string,string> > > attSpecs; + + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + bool readOnly( true ); + pool::ICollection* collection; + try { + collection = collectionService.handle( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), readOnly ); + } + catch ( pool::Exception& poolException ) { + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + cout << "Unable to open collection " << srcinfo.name(i) << endl; + cout << "---> STOPPING HERE" << endl; + return 5; + } + + const pool::ICollectionDescription &description = collection->description(); + collMap[i] = collection; + collCounter++; + vector< pair<string,string> > spec; + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfAttributeColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.attributeColumn(c, f); + spec.push_back(make_pair(column.name(),column.type())); + } + } + attSpecs[i]=spec; + spec.clear(); + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.tokenColumn(c, f); + spec.push_back(make_pair(column.name(),column.type())); + } + } + tokSpecs[i]=spec; + } + + + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << "Collection list: " << endl; + for (unsigned int i=0; i < collCounter; ++i) { + std::cout << "NAME: "; + std::cout << collMap[i]->description().name(); + std::cout << " TYPE: "; + std::cout << collMap[i]->description().type(); + std::cout << " NFRAG: "; + std::cout << collMap[i]->description().numberOfCollectionFragments(); + std::cout << std::endl; + } + + if (doGroups) dumpGroups(collMap[0]->description()); + + if (collCounter==1) { + // For a single input, just dump the description + if (doTokens) dumpTokens(collMap[0]->description()); + if (doAtts) dumpAttributes(collMap[0]->description()); + } + else if (collCounter == 2 && + collMap[0]->description().equals(collMap[1]->description()) + ) { + // If the two have identical descriptions, then dump the first one + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << " ***" << std::endl; + std::cout << " *** Tokens and Attributes IDENTICAL for these Collections ***" << std::endl; + std::cout << " ***" << std::endl; + if (doTokens) dumpTokens(collMap[0]->description()); + if (doAtts) dumpAttributes(collMap[0]->description()); + } + else { + // If they differ, then + // -- First, Look at token differences + // -- If not any, then dump tokens + // -- If differences exist, dump common tokens, then differences + // -- also write 3 files: common.tokens, <coll1>Only.tokens, <coll2>Only.tokens + // -- Next, look at attribute differences + // -- If not any, then dump attributes + // -- If differences exist, dump common attributes, then differences + // -- also write 3 files: common.atts, <coll1>Only.atts, <coll2>Only.atts + + stable_sort(tokSpecs[0].begin(),tokSpecs[0].end()); + stable_sort(tokSpecs[1].begin(),tokSpecs[1].end()); + stable_sort(attSpecs[0].begin(),attSpecs[0].end()); + stable_sort(attSpecs[1].begin(),attSpecs[1].end()); + // Look at token intersection first + if (doTokens) dumpOverlapEval(collMap[0]->description(), + collMap[1]->description(), + tokSpecs[0], + tokSpecs[1], + "Token"); + // Look at attribute intersection second + if (doAtts) dumpOverlapEval(collMap[0]->description(), + collMap[1]->description(), + attSpecs[0], + attSpecs[1], + "Attribute"); + } + std::cout << "---------------------------------------------------------" << std::endl; + std::cout << std::endl; + std::cout << "Number of collections scanned is: " + << collCounter << std::endl; + std::cout << std::endl; + + for( unsigned int i=0; i < srcinfo.nSrc(); i++ ) { + collMap[i]->close(); + } + + return 0; + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::exception: " << exception.what() << std::endl;; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception caught in main()." << std::endl;; + return 1; + } +} + diff --git a/Database/APR/CollectionUtilities/utilities/CollListFileGUID.cpp b/Database/APR/CollectionUtilities/utilities/CollListFileGUID.cpp new file mode 100755 index 0000000000000000000000000000000000000000..bc720296c16d40511a9e4120fb5d4f31a6d73525 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollListFileGUID.cpp @@ -0,0 +1,259 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollListFileGUID.cpp + * @brief Utility to list the file GUID's used by a POOL collection + * @author K. Karr <Kristo.Karr@cern.ch> + * @author Marcin.Nowak@cern.ch + * $Id: CollListFileGUID.cpp 527111 2012-11-21 21:11:08Z gemmeren $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionRelationalExtensions.h" +#include "CollectionBase/TokenList.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/CollectionDescription.h" + +#include "PersistencySvc/ISession.h" +#include "PersistencySvc/DatabaseConnectionPolicy.h" +#include "PersistencySvc/ITransaction.h" + +#include "CoralBase/MessageStream.h" +#include "POOLCore/Exception.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" + +#include <iostream> +#include <iomanip> +#include <memory> +using namespace std; +using namespace pool; + + + + +void printGroupedGuids( const ICollectionGUIDQuery::CountedGroupedGUIDs& guids ) +{ + cout << "Event count per distinct GUIDs group:" << endl; + cout << " #Events"; + for( vector<string>::const_iterator tok = guids.tokenNames.begin(), + end = guids.tokenNames.end(); + tok != end; ++tok ) { + cout << std::setw(37) << *tok; + } + cout << endl; + + for( vector< ICollectionGUIDQuery::countedGUIDGroup_t >::const_iterator + row = guids.groupedGUIDRows.begin(), end = guids.groupedGUIDRows.end(); + row != end; ++row ) { + cout << std::setw(11) << row->first << " " ; + for( vector<string>::const_iterator tok = row->second.begin(), + end = row->second.end(); + tok != end; ++tok ) { + cout << *tok << " "; + } + cout << endl; + } +} + + +void printCountedGuids( const ICollectionGUIDQuery::CountedGroupedGUIDs& guids ) +{ + // mapping GUID -> (token_name, count) + map<string, pair<string,unsigned> > count; + unsigned tokenN = guids.tokenNames.size(); + for( vector< ICollectionGUIDQuery::countedGUIDGroup_t >::const_iterator + row = guids.groupedGUIDRows.begin(), end = guids.groupedGUIDRows.end(); + row != end; ++row ) { + unsigned n = row->first; + for( unsigned i = 0; i<tokenN; i++ ) { + const string& guid = row->second[i]; + if( count.find(guid) == count.end() ) + count[guid] = make_pair(guids.tokenNames[i],0); + count[guid].second += n; + } + } + for( map<string, pair<string,unsigned> >::const_iterator + row = count.begin(), end = count.end(); row != end; ++row ) { + cout << row->first << " " + << setw(20) << setiosflags(ios::left) << row->second.first + << " (" << row->second.second << ")" << endl; + } +} + + + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollListFileGUID"); + + try + { + coral::MessageStream log( thisProgram ); + pool::CollectionService collectionService; + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + argsVec.desc << thisProgram + << " is a tool for listing POOL file GUIDs referenced from the source event collection(s). " + << endl; + + QualList markers; + markers.insert( make_pair("-maintoken", ArgQual(0)) ); + markers["-maintoken"].desc << "Print only GUIDs from the primary collection Token"; + markers.insert( make_pair("-grouped", ArgQual(0)) ); + markers["-grouped"].desc << "Relational ONLY: Group and count events that have the same GUIDs in selected tokens"; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + argsVec.push_back(&cmdLineArgs); + + CatalogInfo catinfo; argsVec.push_back(&catinfo); + QueryInfo queryinfo; argsVec.push_back(&queryinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + bool mainToken = cmdLineArgs.hasQual("-maintoken"); + bool countGroupedGuids = cmdLineArgs.hasQual("-grouped"); + + if( mainToken && queryinfo.queryOptions().size() ) { + log << coral::Error << "-maintoken and -queryopt flags are exclusive" + << coral::MessageStream::endmsg; + exit(-2); + } + + catinfo.setCatalogs( &collectionService ); + + size_t row_count(0); + std::map< Guid, int > fileIdMap; + std::map< std::string, int > emptyTokensCount; + std::map< Guid, std::string> guidtoNameMap; + + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + bool readOnly( true ); + pool::ICollection *collection = collectionService.handle( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), readOnly ); + if( mainToken ) { + log << coral::Info << "Collection: " << collection->description().name() + << ", Primary Token name is: " << collection->description().eventReferenceColumnName() + << coral::MessageStream::endmsg; + } + log << coral::Debug << "Creating query for the collection" << coral::MessageStream::endmsg; + + if( collection->description().type() == "RelationalCollection" ) { + // the fast query version for relational collections + // (gets only references to the Link table) + auto_ptr<pool::ICollectionGUIDQuery> + collQuery( dynamic_cast<ICollectionRelationalExtensions*>(collection)->newGUIDQuery() ); + collQuery->setCondition( queryinfo.query(i) ); + if( mainToken ) { + collQuery->addToOutputList( collection->description().eventReferenceColumnName() ); + } else if( queryinfo.queryOptions().size() ) { + collQuery->addToOutputList( queryinfo.queryOptions() ); + } else { + collQuery->selectAllTokens(); + } + log << coral::Debug << "Executing fast GUID query for a relational collection" + << coral::MessageStream::endmsg; + if( countGroupedGuids ) { + printGroupedGuids( collQuery->getGroupedGUIDs() ); + } else { + printCountedGuids( collQuery->getGroupedGUIDs() ); + } + } else { + // standard query version - read all rows and collect GUIDs + pool::ICollectionQuery *collQuery = collection->newQuery(); + collQuery->setRowCacheSize( 10000 ); + collQuery->setCondition( queryinfo.query(i) ); + if( mainToken ) { + // get the main token only by default + } else if( queryinfo.queryOptions().size() ) { + collQuery->skipEventReference( true ); + collQuery->addToOutputList( queryinfo.queryOptions() ); + } else { + collQuery->selectAllTokens(); + } + log << coral::Debug + << ">> Executing the query " << coral::MessageStream::endmsg; + pool::ICollectionCursor& cursor = collQuery->execute(); + + while( cursor.next() ) { + if( ++row_count % 10000 == 0 ) { + log << coral::Debug + << "Read in " << row_count << " rows" << coral::MessageStream::endmsg; + } + const pool::TokenList &tokens = cursor.currentRow().tokenList(); + for( pool::TokenList::const_iterator iter = tokens.begin(); + iter != tokens.end(); ++iter ) { + Guid fileId = iter->dbID(); + if( fileId != Guid::null() ) { + map< Guid, int >::iterator mapIter = fileIdMap.find( fileId ); + if( mapIter == fileIdMap.end() ) { + fileIdMap[ fileId ] = 1; + guidtoNameMap[ fileId ] = iter.tokenName(); + } else { + ++(mapIter->second); + } + } else { + // empty token - count them + map< string, int >::iterator mapIter = emptyTokensCount.find( iter.tokenName() ); + if( mapIter == emptyTokensCount.end() ) { + emptyTokensCount[iter.tokenName()] = 1; + } else { + ++(mapIter->second); + } + } + } + } + + std::map<Guid, int>::iterator it = fileIdMap.begin(); + while( it!=fileIdMap.end() ) { + std::string tName = "UNKNOWN"; + std::map<Guid, std::string>::iterator name_it = guidtoNameMap.find(it->first); + if( name_it != guidtoNameMap.end() ) + tName = name_it->second; + std::cout << it->first.toString() << " " << setw(20) << setiosflags(ios::left) << tName + << " (" << it->second << ")" << std::endl; + ++it; + } + map< string, int >::iterator it2 = emptyTokensCount.begin(); + while( it2 != emptyTokensCount.end() ) { + std::cout << "00000000-0000-0000-0000-000000000000" << " " << setw(20) << setiosflags(ios::left) + << it2->first << " (" << it2->second << ")" << std::endl; + ++it2; + } + } + collection->close(); + delete collection; + } + return 0; + + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::Exception: " << exception.what() << std::endl; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception. " << std::endl; + return 1; + } +} diff --git a/Database/APR/CollectionUtilities/utilities/CollListMetadata.cpp b/Database/APR/CollectionUtilities/utilities/CollListMetadata.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9811a5105233e6fbb0117223261ed859714d9e14 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollListMetadata.cpp @@ -0,0 +1,284 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollListMetadata.cpp + * @brief Utility to print metadata from a POOL collection + * @author Marcin.Nowak@cern.ch + */ +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionMetadata.h" + +#include "FileCatalog/IFileCatalog.h" +#include "FileCatalog/IFCAction.h" + +#include "CoralBase/MessageStream.h" +#include "POOLCore/Exception.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionUtilities/CollectionMetadataParser.h" + +#include <xercesc/dom/DOM.hpp> +#include <xercesc/dom/DOMImplementation.hpp> +#include <xercesc/dom/DOMImplementationLS.hpp> +#if XERCES_VERSION_MAJOR < 3 +#include <xercesc/dom/DOMWriter.hpp> +#else +#include <xercesc/dom/DOMLSSerializer.hpp> +#endif +#include <xercesc/dom/DOMNode.hpp> +#include <xercesc/parsers/XercesDOMParser.hpp> + +#include <xercesc/sax/SAXException.hpp> + +#include <xercesc/framework/StdOutFormatTarget.hpp> +#include <xercesc/framework/LocalFileFormatTarget.hpp> +#include <xercesc/framework/StdOutFormatTarget.hpp> +#include <xercesc/framework/LocalFileFormatTarget.hpp> + +#include <xercesc/util/XMLUni.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/util/XMLUni.hpp> +#include <xercesc/util/OutOfMemoryException.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/util/TransService.hpp> + +XERCES_CPP_NAMESPACE_USE + +#include <iostream> +using namespace std; +using namespace pool; + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollListMetadata"); + string file("CollMetadata.xml"); + + try + { + coral::MessageStream log( thisProgram ); + pool::CollectionService collectionService; + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + argsVec.desc << thisProgram + << " is a tool for listing metadata stored with POOL Collections." + << endl; + + QualList markers; + markers["-xmlSummary"] = ArgQual("Produce CollMetadata.xml with metadata for all nodes", 0); + markers["-raw"] = ArgQual("Prints metadata without any parsing (forced ON right now)", 0); + markers["-stat"] = ArgQual("Prints metadata statistics only", 0); + markers["-stats"] = markers["-stat"]; + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + argsVec.push_back(&cmdLineArgs); + + CatalogInfo catinfo; argsVec.push_back(&catinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + bool summary = cmdLineArgs.hasQual("-xmlSummary"); + bool raw = cmdLineArgs.hasQual("-raw"); + bool stats = cmdLineArgs.hasQual("-stat") || cmdLineArgs.hasQual("-stats"); + + catinfo.setCatalogs( &collectionService ); + + + std::map<int, pool::ICollection*> collMap; + std::map< std::string, int > pfnMap; + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + cout<< "Printing metadata from collection '" << srcinfo.name(i) + << "', type: " << srcinfo.type(i) + << endl; + + bool readOnly( true ); + pool::ICollection *collection = collectionService.handle( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), readOnly ); + + long long entries = collection->metadata().entries(); + cout << "Metadata entries: " << entries << endl; + + if( stats ) { + size_t keymax(0), keytot(0), valmax(0), valtot(0), valzero(0); + pool::ICollectionMetadata::const_iterator mdIter = collection->metadata().begin(); + while( mdIter != collection->metadata().end() ) { + size_t keylen = mdIter.key().length(); + size_t vallen = mdIter.value()? strlen(mdIter.value()) : 0; + if( keylen > keymax ) keymax = keylen; + keytot += keylen; + if( vallen > valmax ) valmax = vallen; + valtot += vallen; + if( vallen == 0 ) valzero++; + ++mdIter; + } + if( !entries ) { + cout << "No metadata found!" << endl; + } else { + cout << "Maximum key length : " << keymax <<", average: " << keytot/entries << endl; + cout << "Maximum value length: " << valmax <<", average: " << valtot/entries + << ", empty values count: " << valzero + << endl; + } + return 0; + } + + + if( raw ) { + pool::ICollectionMetadata::const_iterator mdIter = collection->metadata().begin(); + while( mdIter != collection->metadata().end() ) { + cout << " Metadata Key= " << mdIter.key(); + cout << ", Value= " << mdIter.value() << endl; + ++mdIter; + } + return 0; + } + + // send the metadata to the parser + CollectionMetadataParser parser; + parser.init(collection->metadata()); + // get a list of the nodes and iterate over the metadata for each node + for (unsigned int n = 0; n < parser.numLevels(); ++n) { + cout << endl << " >>> " << srcinfo.name(i) + << " provenance LEVEL: " << n << " <<<" + << endl << endl; + set<ProvNode> nodes = parser.listNodes(n); + //set<ProvNode> nodes = parser.listNodes(); + for (set<ProvNode>::const_iterator nodes_it = nodes.begin(); + nodes_it != nodes.end(); + ++nodes_it) { + set< pair<MetaKey,MetaValue> > data = parser.getUniqueMetaForNode(*nodes_it); + cout << "Metadata from Node " << *nodes_it << ":" << endl; + for (set< pair<MetaKey,MetaValue> >::const_iterator it = data.begin(); + it != data.end(); + ++it) { + cout << " Key=" << it->first << " Value=" << it->second << endl; + } + } + } + + if (summary) { + try { + const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull}; + DOMImplementation* impl(DOMImplementationRegistry::getDOMImplementation (ls_id)); + DOMDocumentType* CMTType = impl->createDocumentType(XMLString::transcode("CollMetadataTree"), + XMLString::transcode(""), + XMLString::transcode("") ); + DOMDocument* newDocument = impl->createDocument(0, XMLString::transcode("CollMetadataTree"), CMTType); + set<ProvNode> nodes = parser.listNodes(); + + for (set<ProvNode>::const_iterator nodes_it = nodes.begin(); + nodes_it != nodes.end(); + ++nodes_it) { + DOMElement* provbase = newDocument->createElement(XMLString::transcode("ProvNode")); + provbase->setAttribute(XMLString::transcode("name"), + XMLString::transcode((*nodes_it).c_str())); + newDocument->getDocumentElement()->appendChild(provbase); + set< pair<MetaKey,MetaValue> > metas = parser.getUniqueMetaForNode(*nodes_it); + set< pair<MetaKey,MetaValue> >::const_iterator metit = metas.begin(); + while(metit != metas.end()) { + DOMElement* meta_el = newDocument->createElement(XMLString::transcode("KeyValuePair")); + meta_el->setAttribute(XMLString::transcode("key"), + XMLString::transcode(metit->first.c_str())); + meta_el->setAttribute(XMLString::transcode("value"), + XMLString::transcode(metit->second.c_str())); + provbase->appendChild(meta_el); + ++metit; + } + } + +#if XERCES_VERSION_MAJOR < 3 + // get a writer instance + DOMWriter *theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter(); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true); + + if (theSerializer->canSetFeature(XMLUni::fgDOMWRTBOM, true)) + theSerializer->setFeature(XMLUni::fgDOMWRTBOM, true); + + // set a target as the file argument + //std::string file("CollMetadata.xml"); + std::cout << "About to write summary file " << file << std::endl; + XMLFormatTarget* myFormTarget = new LocalFileFormatTarget(file.c_str()); + + // write document to target + theSerializer->writeNode(myFormTarget, *newDocument); + + // clean up the mess + if (theSerializer!=NULL && theSerializer!=0) delete theSerializer; + if (myFormTarget!=NULL && myFormTarget!=0) delete myFormTarget; + if (newDocument!=NULL && newDocument!=0) delete newDocument; +#else + // get a writer instance + DOMLSSerializer *theSerializer = ((DOMImplementationLS*)impl)->createLSSerializer(); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTDiscardDefaultContent, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTDiscardDefaultContent, true); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true); + + if (theSerializer->getDomConfig()->canSetParameter(XMLUni::fgDOMWRTBOM, true)) + theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTBOM, true); + + // set a target as the file argument + //std::string file("CollMetadata.xml"); + std::cout << "About to write summary file " << file << std::endl; + XMLFormatTarget* myFormTarget = new LocalFileFormatTarget(file.c_str()); + + // write document to target + // See http://xerces.apache.org/xerces-c/program-dom-3.html + DOMLSOutput* theOutput = ((DOMImplementationLS*)impl)->createLSOutput(); + theOutput->setByteStream(myFormTarget); + theSerializer->write(newDocument, theOutput); + + // clean up the mess + if (theSerializer!=NULL && theSerializer!=0) delete theSerializer; + if (theOutput!=NULL && theOutput!=0) delete theOutput; + if (myFormTarget!=NULL && myFormTarget!=0) delete myFormTarget; + if (newDocument!=NULL && newDocument!=0) delete newDocument; +#endif + } + catch (const SAXException& e) { + std::cout << "xml error: " << e.getMessage( ) << "\n"; + } + catch (const DOMException& e) { + std::cout << "xml error: " << e.getMessage( ) << "\n"; + } + } + } + + + log << coral::Info << "Finished listing metadata" << coral::MessageStream::endmsg; + return 0; + + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::Exception: " << exception.what() << std::endl; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception. " << std::endl; + return 1; + } +} + diff --git a/Database/APR/CollectionUtilities/utilities/CollListPFN.cpp b/Database/APR/CollectionUtilities/utilities/CollListPFN.cpp new file mode 100755 index 0000000000000000000000000000000000000000..9d75aea43d5d1ea263cc7de6a29fd27ad3453558 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollListPFN.cpp @@ -0,0 +1,153 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollListPFN.cpp + * @brief Utility to list the physical file names used by a POOL collection + * @author K. Karr <Kristo.Karr@cern.ch> + * $Id: CollListPFN.cpp 527111 2012-11-21 21:11:08Z gemmeren $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/ICollection.h" + +#include "FileCatalog/IFileCatalog.h" +#include "FileCatalog/IFCAction.h" +#include "POOLCore/Exception.h" + +#include "CoralBase/MessageStream.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" + +#include <iostream> +using namespace std; +using namespace pool; + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollListPFN"); + try + { + pool::CollectionService collectionService; + coral::MessageStream log( thisProgram ); + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + argsVec.desc << thisProgram + << " lists Physical File Names of POOL files referenced from the source event collection(s). " + << endl; + + CatalogInfo catinfo; argsVec.push_back(&catinfo); + QueryInfo queryinfo; argsVec.push_back(&queryinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + catinfo.setCatalogs( &collectionService ); + + pool::IFileCatalog FC; + catinfo.setFileCatalogs( &FC ); + FC.start(); + + std::map<int, pool::ICollection*> collMap; + std::map< string, string > guidToPfn; + std::map< std::string, int > pfnMap; + std::map< std::string, int > notFoundGuids; + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + bool readOnly( true ); + pool::ICollection *collection = collectionService.handle( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), readOnly, 0 ); + collMap[i] = collection; + + log << coral::Debug + << " Creating query for the collection" << coral::MessageStream::endmsg; + pool::ICollectionQuery *collQuery = collection->newQuery(); + collQuery->setCondition( queryinfo.query() ); + if( queryinfo.queryOptions().size() ) { + collQuery->addToOutputList( queryinfo.queryOptions() ); + } else { + collQuery->selectAllTokens(); + } + log << coral::Debug << " Executing the query " << coral::MessageStream::endmsg; + pool::ICollectionCursor& cursor = collQuery->execute(); + + while( cursor.next() ) { + std::string dbFileId = cursor.eventRef().dbID().toString(); + if( notFoundGuids.find( dbFileId ) != notFoundGuids.end() ) { + // "known" unknown guid + notFoundGuids[dbFileId]++; + } else { + if( guidToPfn.find( dbFileId ) != guidToPfn.end() ) { + pfnMap[ guidToPfn[dbFileId] ] ++; + } else { + // new GUID + std::string bestPFN; + std::string fileType; + pool::IFCAction action; + FC.setAction( action ); + action.lookupBestPFN( dbFileId, + pool::FileCatalog::READ, + pool::FileCatalog::SEQUENTIAL, + bestPFN, + fileType ); + if( bestPFN.size() ) { + // found a name + guidToPfn[ dbFileId ] = bestPFN; + pfnMap[ bestPFN ] = 1; + } else { + notFoundGuids[dbFileId] = 1; + } + } + } + } + } + if( notFoundGuids.size() ) { + log << coral::Warning << "Found " << notFoundGuids.size() + << " unresolved POOL file GUID(s)" << coral::MessageStream::endmsg; + std::map< std::string, int >::iterator mapIter; + for( mapIter = notFoundGuids.begin(); mapIter != notFoundGuids.end(); ++mapIter ) { + log<< coral::Debug << " unresolved GUID: " << mapIter->first << coral::MessageStream::endmsg; + } + } + if( !pfnMap.size() ) { + log << coral::Warning << "No PFNs found - is your POOL file catalog correctly specified?" << coral::MessageStream::endmsg; + } else { + log << coral::Info << "Found PFNs referenced from the collection(s): " << coral::MessageStream::endmsg; + std::map< std::string, int >::iterator mapIter; + for( mapIter = pfnMap.begin(); mapIter != pfnMap.end(); ++mapIter ) { + std::cout << mapIter->first << std::endl; + } + } + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + collMap[i]->close(); + } + return 0; + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::Exception: " << exception.what() << std::endl; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception. " << std::endl; + return 1; + } +} diff --git a/Database/APR/CollectionUtilities/utilities/CollListToken.cpp b/Database/APR/CollectionUtilities/utilities/CollListToken.cpp new file mode 100755 index 0000000000000000000000000000000000000000..6deb540debcfbada91e7d654f71d87909341159d --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollListToken.cpp @@ -0,0 +1,161 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollListToken.cpp + * @brief Utility to list the event references of a POOL collection + * @author K. Karr <Kristo.Karr@cern.ch> + * @author Marcin.Nowak@cern.ch + * $Id: CollListToken.cpp 506773 2012-06-21 15:40:21Z gemmeren $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/TokenList.h" +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionService.h" + +#include "CoralBase/MessageStream.h" +#include "POOLCore/Exception.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionUtilities/MaxEventsInfo.h" + +#include <sstream> +#include <iostream> +using namespace std; +using namespace pool; +#define corENDL coral::MessageStream::endmsg + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollListToken"); + try + { + coral::MessageStream log( thisProgram ); + pool::CollectionService collectionService; + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + argsVec.desc << thisProgram + << " is a tool for querying an event collection (or collections), " + << "and displaying POOL Tokens contained in the query result set. " << endl + << "By default all rows are displayed" << endl; + + CatalogInfo catinfo; argsVec.push_back(&catinfo); + QueryInfo queryinfo; argsVec.push_back(&queryinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + MaxEventsInfo maxEvents; argsVec.push_back(&maxEvents); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + catinfo.setCatalogs( &collectionService ); + + std::map<int, pool::ICollection*> collMap; + unsigned int collCounter = 0; + for ( unsigned int i=0; i<srcinfo.nSrc(); i++ ) + { + collCounter++; + + bool readOnly( true ); + pool::ICollection *collection = collectionService.handle( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), readOnly ); + collMap[i] = collection; + + const pool::ICollectionDescription &description = collection->description(); + + std::cout << std::endl; + std::cout << "Collection name: " << description.name() << std::endl; + std::cout << "Collection type: " << description.type() << std::endl; + std::cout << "Number of collection fragments: " << description.numberOfCollectionFragments() << std::endl; + std::cout << "Primary Reference Token name is: " << description.eventReferenceColumnName() << std::endl; + std::cout << "Number of tokens: " << description.numberOfTokenColumns() << std::endl; + std::cout << "Tokens are: " << std::endl; + + unsigned maxNameSize = 0; + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.tokenColumn(c, f); + if( maxNameSize < column.name().size() ) + maxNameSize = column.name().size(); + } + } + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfTokenColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.tokenColumn(c, f); + std::cout.setf(std::ios::left); + std::cout << "NAME: "; + std::cout.width(maxNameSize+5); + std::cout << column.name(); + std::cout << "INFO: "; + std::cout << column.annotation(); + std::cout << std::endl; + } + } + + log << coral::Debug << "Creating query for the collection" << corENDL; + pool::ICollectionQuery *collQuery = collection->newQuery(); + collQuery->setCondition( queryinfo.query() ); + if( queryinfo.queryOptions().size() ) { + collQuery->skipEventReference(); + collQuery->addToOutputList( queryinfo.queryOptions() ); + } else { + collQuery->selectAllTokens(); + } + log << coral::Debug << "Executing query: " << corENDL; + pool::ICollectionCursor& cursor = collQuery->execute(); + + int evtCounter = 0; + while( cursor.next() ) { + evtCounter++; + if( maxEvents.specified() && evtCounter > maxEvents.get() ) + break; + + std::cout << "Number of events counted: " << evtCounter << std::endl; + std::cout << "Tokens: " << std::endl; + + const pool::TokenList &tokens = cursor.currentRow().tokenList(); + for( pool::TokenList::const_iterator iter = tokens.begin(); + iter != tokens.end(); ++iter ) { + std::cout.setf(std::ios::left); + std::cout << "NAME: "; + std::cout.width(maxNameSize+5); + std::cout << iter.tokenName(); + std::cout << "VALUE: " << iter->toString(); + std::cout << std::endl; + } + } + } + + for( unsigned int i=0; i<<srcinfo.nSrc(); i++ ) { + collMap[i]->close(); + } + + return 0; + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::exception: " << exception.what() << std::endl;; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception caught in main()." << std::endl;; + return 1; + } +} diff --git a/Database/APR/CollectionUtilities/utilities/CollQuery.cpp b/Database/APR/CollectionUtilities/utilities/CollQuery.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4284cf50200e2532670b9fdfc8fd8dffc30f4cb6 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollQuery.cpp @@ -0,0 +1,576 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollQuery.cpp + * @brief Utility to list the metadata of a POOL collection + * @author K. Karr <Kristo.Karr@cern.ch> + * @author Marcin.Nowak@cern.ch + * $Id: CollQuery.cpp 556226 2013-07-30 15:42:48Z mnowak $ + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/ICollectionColumn.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/ICollectionMetadata.h" +#include "CollectionBase/CollectionService.h" + +#include "POOLCore/Exception.h" + +#include "CoralBase/AttributeList.h" +#include "CoralBase/Attribute.h" +#include "CoralBase/MessageStream.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/CmdLineArgs2.h" +#include "CollectionUtilities/ArgQual.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/QueryInfo.h" +#include "CollectionUtilities/CatalogInfo.h" +#include "CollectionUtilities/MaxEventsInfo.h" + +#include <iostream> +#include <iomanip> +#include <memory> + + +// ------ copied the whole class here for quick adaptation. MN +#include <set> + +using namespace pool; +using namespace std; + + +class AttributeType +{ +public: + AttributeType(std::string t, std::string u, std::string g); + + std::string info() const; +// std::string nonTypeInfo() const; + + void setType(std::string info) {setInfoForKey("TYPE",info);} + void setUnit(std::string info) {setInfoForKey("UNIT",info);} + void setGroup(std::string info) {setInfoForKey("GRP",info);} + void fromString(std::string info); + + std::string typeName() const; + std::string unit() const; + std::string group() const; + + std::set<std::string> getKeys() {return m_keys;} + +private: + + bool setInfoForKey(std::string key, std::string info); + + std::map<std::string,std::string> m_keyedInfo; + std::set<std::string> m_keys; +}; + +inline +AttributeType::AttributeType(std::string t, + std::string u="UNKNOWN", + std::string g="UNKNOWN") +{ + m_keyedInfo.clear(); + m_keys.clear(); + m_keys.insert("TYPE"); + m_keys.insert("UNIT"); + m_keys.insert("GRP"); + m_keyedInfo.insert(std::make_pair("TYPE",t)); + m_keyedInfo.insert(std::make_pair("UNIT",u)); + m_keyedInfo.insert(std::make_pair("GRP",g)); +} + +inline +bool AttributeType::setInfoForKey(std::string key, std::string info) +{ + if ( m_keys.find(key) != m_keys.end() ) { + std::map<std::string,std::string>::iterator i = m_keyedInfo.find(key); + if (i != m_keyedInfo.end()) i->second=info; + else m_keyedInfo.insert(std::make_pair(key,info)); + return true; + } + else return false; +} + +inline void +AttributeType::fromString(std::string i) +{ + m_keyedInfo.clear(); + m_keys.clear(); + + // search for type first + std::set<std::string> keys; + keys.insert("TYPE"); + keys.insert("UNIT"); + keys.insert("GRP"); + std::string::size_type flagpos, endpos; + for (std::set<std::string>::iterator it = keys.begin(); it!=keys.end(); ++it) { + flagpos = i.find(*it); + if (flagpos != std::string::npos) { + endpos = i.find(";",flagpos); + if (endpos != std::string::npos) { + this->setInfoForKey(*it,i.substr(flagpos,endpos)); + } + } + } +} + +inline +std::string AttributeType::typeName() const +{ + std::map<std::string,std::string>::const_iterator temp = m_keyedInfo.find("TYPE"); + if (temp!=m_keyedInfo.end()) return temp->second; + else return std::string("NOTFOUND"); +} + +inline +std::string AttributeType::unit() const +{ + std::map<std::string,std::string>::const_iterator temp = m_keyedInfo.find("UNIT"); + if (temp!=m_keyedInfo.end()) return temp->second; + else return std::string("NOTFOUND"); +} + +inline +std::string AttributeType::group() const +{ + std::map<std::string,std::string>::const_iterator temp = m_keyedInfo.find("GRP"); + if (temp!=m_keyedInfo.end()) return temp->second; + else return std::string("NOTFOUND"); +} + +inline +std::string AttributeType::info() const +{ + std::ostringstream os; + os << "TYPE="<<typeName()<<";"<<"UNIT="<<unit()<<";"<<"GRP="<<group()<<";"; + return os.str(); +} + + + + +using namespace std; + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollQuery"); + try { + pool::CollectionService collectionService; + coral::MessageStream log( thisProgram ); + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + + // list of CollQuery *specific* cli keys and their argument properties + QualList markers; + markers.insert( make_pair("-count", pool::ArgQual(0)) ); + markers.insert( make_pair("-colwidth", ArgQual()) ); + markers.insert( make_pair("-pktype", ArgQual()) ); + markers.insert( make_pair("-pkrange", ArgQual(2)) ); + markers.insert( make_pair("-nevents", ArgQual()) ); + markers.insert( make_pair("-nevtperprint", ArgQual()) ); + //markers.insert( make_pair("-quiet", ArgQual(0)) ); + + markers["-count"].desc << "Only print out the count of the query, like count(*)"; + markers["-colwidth"].desc << "Width of output columns in characters"; + markers["-pktype"].desc << "NOT CURRENTLY IMPLEMENTED " << endl; + markers["-pktype"].desc << "Primary key type: " << endl + << " no PK (DEFAULT)" << endl + << " Other valid choices: " << endl + << " (1) ATLAS = (run,event,lumiblock)"; + markers["-pkrange"].desc << "NOT CURRENTLY IMPLEMENTED " << endl; + markers["-pkrange"].desc << "First and last primary keys to be considered " << endl + << "listed as tuple pair (val1,val2,...) appropriate for pktype chosen. " + << "NO SPACES allowed!" + << "(DEFAULT = all) "; + markers["-nevents"].desc << "Number of events starting from begin of pkrange for " + << "which to how results. (DEFAULT = all) "; + markers["-nevtperprint"].desc << "Number of events to process before " + << "each print of status to the screen. (DEFAULT=200)"; +/* + markers["-quiet"].desc << "Qualifier that takes no arguments and specifies " + << "that no information except the (optional) event dump and " + << "event scan status information is to be printed to screen; " + << "Useful for performance testing"; +*/ + + CmdLineArgs2 cmdLineArgs; + cmdLineArgs.setArgQuals(markers); + argsVec.push_back(&cmdLineArgs); // Add it to the list + + // Classes with shared cli keys and their argument properties + // Add them to the list + CatalogInfo catinfo; argsVec.push_back(&catinfo); + QueryInfo queryinfo; argsVec.push_back(&queryinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + MaxEventsInfo maxEvents; argsVec.push_back(&maxEvents); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + // Fill appropriate vectors based on CollQuery *specific* cmdLineArgs + unsigned int numEventsPerPrint = 0; + unsigned int colWidth = 16, idWidth=24; + bool noPrint = false; + string val; + bool countOnly = cmdLineArgs.hasQual("-count"); + if( cmdLineArgs.getOpt("-colwidth", val) ) { + colWidth = atoi( val.c_str() ); + } + if( cmdLineArgs.hasQual("-pktype") ) { + cerr << "WARNING: -pktype not implemented" << endl; + } + if( cmdLineArgs.hasQual("-pkrange") ) { + cerr << "WARNING: -pkrange not implemented" << endl; + } + if( cmdLineArgs.getOpt("-nevtperprint", val) ) { + numEventsPerPrint = atoi( val.c_str() ); + } + + + catinfo.setCatalogs( &collectionService ); + + unsigned int collCounter = 0; + unsigned int evtCounterTotal = 0; + + std::map<std::string, int> CollCnts; + std::map<std::string, double> AttrSumMap; + std::map<std::string, double> AttrMaxMap; + std::map<std::string, double> AttrMinMap; + + std::map<int, pool::ICollection*> collMap; + unsigned int nAttrib = 0; + std::vector<std::string> attribNames; + std::vector<unsigned int> widths; + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + bool readOnly( true ); + pool::ICollection* collection = collectionService.handle( srcinfo.name(i), + srcinfo.type(i), + srcinfo.connect(), + readOnly ); + collMap[i] = collection; + collCounter++; + + const pool::ICollectionDescription &description = collection->description(); + unsigned int maxNameSize = 0; + unsigned int maxTypeNameSize = 0; + for( int f=0; f < description.numberOfCollectionFragments(); f++ ) { + for( int c=0; c < description.numberOfAttributeColumns( f ); c++ ) { + const pool::ICollectionColumn& column = description.attributeColumn(c, f); + if( maxNameSize < column.name().size() ) + maxNameSize = column.name().size(); + if( maxTypeNameSize < column.type().size() ) + maxTypeNameSize = column.type().size(); + } + } + +/* + std::cout << std::endl; + std::cout << "Collection name: " << description.name() << std::endl; + std::cout << "Collection type: " << description.type() << std::endl; + std::cout << "Number of collection fragments: " + << description.numberOfCollectionFragments() << std::endl; + std::cout << "Number of tokens is: " + << description.numberOfTokenColumns() << std::endl; + std::cout << "Number of attributes is: " + << description.numberOfAttributeColumns() << std::endl; +*/ + auto_ptr<pool::ICollectionQuery> collQuery( collection->newQuery() ); + if (queryinfo.hasQual("-query")) { + log << coral::Debug << ">> Creating query for the collection " << coral::MessageStream::endmsg; + } + collQuery->setCondition( queryinfo.query(i) ); + // set queryoptions + if( queryinfo.queryOptions().size() ) { + collQuery->addToOutputList( queryinfo.queryOptions() ); + } + else { + collQuery->selectAllAttributes(); + } + log << coral::Debug << ">> Executing the query " << coral::MessageStream::endmsg; + pool::ICollectionCursor& cursor = collQuery->execute(); + + int evtCounter = 0; + + std::string attribName = ""; + std::string attribTypeName = ""; + log << coral::Debug << ">> Iterating over query results " << coral::MessageStream::endmsg; + while( cursor.next() ) { + if( maxEvents.specified() && int(evtCounterTotal) >= maxEvents.get() ) + break; + + evtCounter++; + evtCounterTotal++; + + if( numEventsPerPrint>0 && !(evtCounterTotal % numEventsPerPrint) ) { + log << coral::Info << "Rows processed: " << evtCounterTotal << coral::MessageStream::endmsg; + } + const coral::AttributeList &attribList = cursor.currentRow().attributeList(); + + if (evtCounterTotal==1) { + std::cout << std::endl; + //std::cout << "Collection name: " << srcinfo.name(i) << std::endl; + //std::cout << "Collection type: " << srcinfo.type(i) << std::endl; + + std::cout << std::endl; + std::vector<std::string> names,types,units,groups; + for ( coral::AttributeList::const_iterator iter = attribList.begin(); + iter != attribList.end(); ++iter ) + { + attribName = iter->specification().name(); + attribTypeName = iter->specification().typeName(); + AttributeType attribAAT(attribTypeName); + attribAAT.fromString(collection->description().column(attribName).annotation()); + // Find widest piece + unsigned int width = max(attribName.size(),attribTypeName.size()); + width = max( width, (unsigned int)attribAAT.unit().size() ); + width = max( width, (unsigned int)attribAAT.group().size() ); + names.push_back(attribName); + types.push_back(attribTypeName); + units.push_back(attribAAT.unit()); + groups.push_back(attribAAT.group()); + //widths.push_back(width); + widths.push_back(colWidth); + ++nAttrib; + } +/* + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "GROUP: "; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + if (groups[i].size()<colWidth) { + std::cout << groups[i]; + } + else { + std::string cname2(groups[i].substr(0,max(2,((int)colWidth-2)/2))); + cname2 += ".."; + cname2 += groups[i].substr(groups[i].size()-max(2,((int)colWidth-2)/2)); + cout << cname2; + } + } +*/ + if (!countOnly) { + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "NAME: "; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + if (names[i].size()<colWidth) { + std::cout << names[i]; + } + else { + std::string cname2(names[i].substr(0,max(2,((int)colWidth-2)/2))); + cname2 += ".."; + cname2 += names[i].substr(names[i].size()-max(2,((int)colWidth-2)/2)); + cout << cname2; + } + } + + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "TYPE: "; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + if (types[i].size()<colWidth) { + std::cout << types[i]; + } + else { + std::string cname2(types[i].substr(0,max(2,((int)colWidth-2)/2))); + cname2 += ".."; + cname2 += types[i].substr(types[i].size()-max(2,((int)colWidth-2)/2)); + cout << cname2; + } + } + + /* + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "UNIT: "; + //unsigned int totwidth=0; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + if (units[i].size()<colWidth) { + std::cout << units[i]; + } + else { + std::string cname2(units[i].substr(0,max(2,((int)colWidth-2)/2))); + cname2 += ".."; + cname2 += units[i].substr(units[i].size()-max(2,((int)colWidth-2)/2)); + cout << cname2; + } + //totwidth += (2+widths[i]);` + } + */ + std::cout << std::endl; + std::string linesep; + for (unsigned int i=0; i<idWidth+nAttrib*colWidth; ++i) linesep += "-"; + std::cout << linesep; + } + cout << endl; + } + +/* + for ( coral::AttributeList::const_iterator iter = attribList.begin(); + iter != attribList.end(); ++iter ) +*/ + if( !maxEvents.specified() || evtCounter <= maxEvents.get() ) + { + std::cout.width(idWidth); + std::string cname(collection->description().name()); + if (cname.size()<idWidth) { + if (!countOnly) std::cout << cname; + } + else { + std::string cname2(cname.substr(0,max(2,((int)idWidth-2)/2))); + cname2 += ".."; + cname2 += cname.substr(cname.size()-max(2,((int)idWidth-2)/2)); + if (!countOnly) cout << cname2; + } + + for (unsigned int j=0; j<nAttrib; ++j) + { + //attribName = iter->specification().name(); + //attribTypeName = iter->specification().typeName(); + attribName = attribList[j].specification().name(); + attribTypeName = attribList[j].specification().typeName(); + attribNames.push_back(attribName); + + std::cout.width(widths[j]); + ostringstream lots; + attribList[j].toOutputStream(lots); + std::string value = lots.str().substr(lots.str().find(":")+1,std::string::npos); + std::cout.width(widths[j]); + if (!countOnly) std::cout << value; + + double val(0); + if( attribTypeName == "short" ) { + val = double(attribList[j].data<short>()); + } else if ( attribTypeName == "unsigned short" ) { + val = double(attribList[j].data<unsigned short>()); + } else if ( attribTypeName == "int" ) { + val = double(attribList[j].data<int>()); + } else if ( attribTypeName == "unsigned int" ) { + val = double(attribList[j].data<unsigned int>()); + } else if ( attribTypeName == "long" ) { + val = double(attribList[j].data<long>()); + } else if ( attribTypeName == "unsigned long" ) { + val = double(attribList[j].data<unsigned long>()); + } else if ( attribTypeName == "float" ) { + val = double(attribList[j].data<float>()); + } else if ( attribTypeName == "double" ) { + val = double(attribList[j].data<double>()); + } + + if ( AttrSumMap.count(attribName) == 0 ) { + AttrMaxMap[attribName] = val; + AttrMinMap[attribName] = val; + } + AttrSumMap[attribName] += val; + if ( val > AttrMaxMap[attribName] ) + { + AttrMaxMap[attribName] = val; + } + if ( val < AttrMinMap[attribName] ) + { + AttrMinMap[attribName] = val; + } + } + if (!countOnly) std::cout << endl; + } + + } + + CollCnts.insert(make_pair(srcinfo.name(i),evtCounter)); + std::cout << std::endl; + } + + std::string linesep; + for (unsigned int i=0; i<idWidth+nAttrib*colWidth; ++i) linesep += "-"; + std::cout << linesep << std::endl; + + std::cout.width(idWidth); + std::cout << "NAME: "; + //unsigned int totwidth=0; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + std::cout << attribNames[i]; + } + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "MAX: "; + //unsigned int totwidth=0; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + std::cout << AttrMaxMap[attribNames[i]]; + } + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "AVG: "; + //unsigned int totwidth=0; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + std::cout << AttrSumMap[attribNames[i]]/double(evtCounterTotal); + } + std::cout << std::endl; + std::cout.width(idWidth); + std::cout << "MIN: "; + //unsigned int totwidth=0; + for (unsigned int i=0; i<nAttrib; ++i) { + std::cout.width(colWidth); + std::cout << AttrMinMap[attribNames[i]]; + } + std::cout << std::endl; + + std::cout << linesep << std::endl; + std::cout << "CONDITIONS = " << queryinfo.query() << std::endl; + std::cout << linesep << std::endl; + + if ( !noPrint ) + { + std::cout << std::endl; + std::cout << "Collections scanned: " << std::endl; + for (std::map<std::string,int>::iterator it = CollCnts.begin(); it != CollCnts.end(); ++it) { + std::cout.width(32); + std::cout << it->first; + std::cout.width(32); + std::cout << it->second << std::endl; + } + std::cout << "Total number of events scanned in all collections is: " + << evtCounterTotal << std::endl; + std::cout << std::endl; + } + + for( unsigned int i=0; i < srcinfo.nSrc(); i++ ) { + collMap[i]->close(); + } + + return 0; + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl;; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::exception: " << exception.what() << std::endl;; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception caught in main()." << std::endl;; + return 1; + } +} diff --git a/Database/APR/CollectionUtilities/utilities/CollRemove.cpp b/Database/APR/CollectionUtilities/utilities/CollRemove.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d558443c78c4e151c79a34161233d9278dd5800 --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollRemove.cpp @@ -0,0 +1,101 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollRemove.cpp + * @brief Utility to delete a POOL collection + * @author Marcin.Nowak@cern.ch + */ +#include "CollectionBase/ICollectionDescription.h" +#include "CollectionBase/CollectionService.h" +#include "CollectionBase/ICollection.h" + +#include "FileCatalog/IFileCatalog.h" +#include "FileCatalog/IFCAction.h" + +#include "CoralBase/MessageStream.h" +#include "POOLCore/Exception.h" + +#include "CollectionUtilities/Args2Container.h" +#include "CollectionUtilities/SrcInfo.h" +#include "CollectionUtilities/CatalogInfo.h" + +#include <iostream> +using namespace std; +using namespace pool; + + +int main(int argc, const char *argv[]) +{ + string thisProgram("CollRemove"); + try + { + coral::MessageStream log( thisProgram ); + pool::CollectionService collectionService; + + // vector of CmdLineArg objects + Args2Container argsVec(thisProgram, true, &log); + argsVec.desc << thisProgram + << " is a tool for deleting Relational POOL Collections. " + << endl; + + CatalogInfo catinfo; argsVec.push_back(&catinfo); + SrcInfo srcinfo; argsVec.push_back(&srcinfo); + + // Check that all cmd line args are valid + if( !argsVec.evalArgs(argc, argv) ) return 1; + if( !argsVec.checkValid() ) return 1; + + catinfo.setCatalogs( &collectionService ); + + std::map<int, pool::ICollection*> collMap; + std::map< std::string, int > pfnMap; + for( unsigned int i=0; i<srcinfo.nSrc(); i++ ) { + if( srcinfo.type(i)=="RootCollection" ) { + log << coral::Error + << "This tool only supports deleting Relational Collections" + << coral::MessageStream::endmsg; + return 2; + } + log << coral::Info + << "Removing collection '" << srcinfo.name(i) + << "', type: " << srcinfo.type(i) + << coral::MessageStream::endmsg; + + bool deleteChildFragments = true; + bool ignoreExternalDependencies = true; // ?? MN + bool result = collectionService.drop( srcinfo.name(i), srcinfo.type(i), srcinfo.connect(), deleteChildFragments, ignoreExternalDependencies); + if( !result ) { + log << coral::Error + << "Problems encountered when removing collection '" << srcinfo.name(i) + << "', type: " << srcinfo.type(i) + << coral::MessageStream::endmsg; + } + } + + log << coral::Info << "Collection(s) deleted" << coral::MessageStream::endmsg; + return 0; + + } + catch( pool::Exception& poolException ) + { + std::cerr << "pool::Exception: " << poolException.what() << std::endl; + return 1; + } + catch( std::exception& exception ) + { + std::cerr << "std::Exception: " << exception.what() << std::endl; + return 1; + } + catch( ... ) + { + std::cerr << "Unknown exception. " << std::endl; + return 1; + } +} + + + + + diff --git a/Database/APR/CollectionUtilities/utilities/CollSplitByGUID.cpp b/Database/APR/CollectionUtilities/utilities/CollSplitByGUID.cpp new file mode 100755 index 0000000000000000000000000000000000000000..e302e3f03f151f9d8145bc372fb954e3589191fb --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/CollSplitByGUID.cpp @@ -0,0 +1,34 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file CollSplitByGUID.cpp + * @brief Utility to list the file GUIDs used by a POOL collection and split the + * collection into sub-collections by GUID + * @author Marcin.Nowak@cern.ch + * $Id: CollSplitByGUID.cpp 458059 2011-09-09 13:52:47Z mnowak $ + */ + +#include "CollectionUtilities/CollSplitByGUIDBase.h" + +#include <iostream> +#include <stdexcept> + +using namespace std; +using namespace pool; + +#define corENDL coral::MessageStream::endmsg + + +int main(int argc, const char *argv[]) +{ + // Convert argv to vector of strings + vector<string> argv_v; + for( int i=0; i<argc; ++ i ) + argv_v.push_back( argv[i] ); + + CollSplitByGUIDBase worker; + return worker.execute( argv_v ); +} + diff --git a/Database/APR/CollectionUtilities/utilities/TokenExtractor.cpp b/Database/APR/CollectionUtilities/utilities/TokenExtractor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f2323958dd9f3e24f0d4135e3023514e07e821b --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/TokenExtractor.cpp @@ -0,0 +1,419 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +/** + * @file TokenExtractor.cpp + * @brief Utility to read the event references of a POOL collection and send POOL tokens over a Yampl channel + */ + +#include "PersistentDataModel/Token.h" + +#include "CollectionBase/ICollection.h" +#include "CollectionBase/TokenList.h" +#include "CollectionBase/ICollectionQuery.h" +#include "CollectionBase/ICollectionCursor.h" +#include "CollectionBase/CollectionRowBuffer.h" +#include "CollectionBase/CollectionService.h" + +#include "POOLCore/Exception.h" + +#include "yampl/SocketFactory.h" + +#include <cstring> +#include <stdlib.h> +#include <getopt.h> +#include <ctype.h> +#include <algorithm> +#include <fstream> +#include <map> +#include <curl/curl.h> + +typedef std::vector<std::string> TokenVector; // Tokens in the POOL file ordered by OID (position in file) +typedef std::map<std::string,TokenVector> MapTokensByGuid; // Token lists by POOL File GUID + +// ________________ CLI ________________ +struct CLI { + bool help; + bool useEI; + bool verbose; + char* source; + char* yampl; +}; + +int parseCLI(int argc, char** argv, CLI& cli) +{ + int opt; + int option_index(0); + + struct option long_options[] = { + {"source", required_argument, 0, 's'} + , {"yampl", required_argument, 0, 'y'} + , {"useEI", no_argument, 0, 'e'} + , {"verbose", no_argument, 0, 'v'} + , {"help", no_argument, 0, 'h'} + , {0, 0, 0, 0} + }; + + while((opt=getopt_long(argc,argv,"hves:y:",long_options,&option_index))!=-1) { + switch(opt) { + case 'h': + cli.help = true; + break; + case 'e': + cli.useEI = true; + break; + case 'v': + cli.verbose = true; + break; + case 's': + cli.source = optarg; + break; + case 'y': + cli.yampl = optarg; + break; + case '?': + return 1; + default: + std::cerr << "ERROR: Unexpected error while parsing command-line options" << std::endl; + return 1; + } + } + + if(optind<argc) { + std::cerr << "ERROR: Unexpected non-option command-line arguments: " << std::endl; + while(optind<argc) std::cerr << '\t' << argv[optind++] << std::endl; + return 1; + } + + if(cli.source==0 && !cli.help) { + std::cerr << "ERROR: -s is a required option!" << std::endl; + return 1; + } + + return 0; +} + +void printUsage() +{ + std::cout << std::endl << "Usage:" << std::endl; + std::cout << "\tTokenExtractor [Options] -s|--source <source>" << std::endl << std::endl; + std::cout << std::endl << "Required parameter:" << std::endl; + std::cout << "\t-s|--source : Source for reading POOL Tokens. Either an ASCII file with POOL GUID to TAG file mappings, or an URL to Event Index" << std::endl; + std::cout << "Options:" << std::endl; + std::cout << "\t-e|--useEI : Use Event Index for reading POOL Tokens (default is to use TAG files)" << std::endl; + std::cout << "\t-y|--yampl <channel_name> : The name of Yampl Channel for communicating with Token Clients (default is TokenExtractorChannel)" << std::endl; + std::cout << "\t-v|--verbose : Run in verbose mode" << std::endl; + std::cout << "\t-h|--help : Print usage and exit" << std::endl << std::endl; +} + +void printConfiguration(const CLI& cli) +{ + std::cout << std::endl << "***** Token Extractor Configuration:" << std::endl; + std::cout << "* Read tokens from " << (cli.useEI?"Event Index":"TAG") << std::endl; + std::cout << "* Source: " << cli.source << std::endl; + std::cout << "* Yampl Channel: " << (cli.yampl?cli.yampl:"TokenExtractorChannel") << std::endl; + std::cout << "* Verbose: " << (cli.verbose?"Yes":"No") << std::endl; + std::cout << "**********************" << std::endl << std::endl; +} +// ________________ CLI ________________ + +// _______________ curl ________________ +struct TokenBuf +{ + void* buf; + size_t size; +}; + +size_t WriteCallback(void *ptr, size_t size, size_t nmemb, void *data) +{ + struct TokenBuf* tokenbuf = (struct TokenBuf*)data; + tokenbuf->buf = realloc(tokenbuf->buf,tokenbuf->size+nmemb); + memcpy((void*)((char*)tokenbuf->buf + tokenbuf->size),ptr,nmemb); + tokenbuf->size = tokenbuf->size + nmemb; + + return (size_t)(size * nmemb); +} +// _______________ curl ________________ + +MapTokensByGuid::const_iterator retrieveTokens(bool useEI + , bool verbose + , char* source + , const std::string& guid + , MapTokensByGuid& tokensByGuid) +{ + MapTokensByGuid::const_iterator retVal(tokensByGuid.end()); + if(useEI) { + // Read tokens from the Event Index web service using curl + std::cout << "Reading tokens from Event Index" << std::endl; + + // Compose the URL + // NB!!! Here we assume that the source looks like this: 'https://aiatlas016.cern.ch/EIHadoop/ES.jsp?query=path:EICache/atlevind/2014_6_31_13_53_7_893&guid=' + // And we want to make it look like that: 'https://aiatlas016.cern.ch/EIHadoop/ES.jsp?query=path:EICache/atlevind/2014_6_31_13_53_7_893&guid=4E5EDBC7-5F6D-E111-B20A-003048F3524E' + std::string url(source); + url += guid; + + if(verbose) std::cout << "URL :" << url << std::endl; + + // Initialize curl + CURL *curl; + CURLcode res; + struct TokenBuf tokenBuf; + tokenBuf.buf = 0; + tokenBuf.size = 0; + + curl = curl_easy_init(); + if(curl) { + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); // "http://wn181.ific.uv.es:8080/getIndex.jsp?guid=4E5EDBC7-5F6D-E111-B20A-003048F3524E&format=txt2" + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &tokenBuf); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + + // Perform the request, res will get the return code + res = curl_easy_perform(curl); + // Check for errors + if(res != CURLE_OK) { + std::cerr << "curl_easy_perform() failed! " << curl_easy_strerror(res) << std::endl; + return retVal; + } + else { + std::cout << "Token reading OK" << std::endl; + } + + // Always cleanup + curl_easy_cleanup(curl); + } + else { + std::cerr << "Unable to initialize curl!" << std::endl; + return retVal; + } + + // As a result of this operation we have a buffer, which contains the tokens separated by \n + // We need to parse the buffer and store the tokens in a new entry of the tokensByGuid map + + // Add new entry to the tokensByGuid map + auto itTokensByGuid = tokensByGuid.insert(std::pair<std::string,TokenVector>(guid,TokenVector())).first; + TokenVector& newTokens = itTokensByGuid->second; + + // Fill it in + char* charbuf = (char*)tokenBuf.buf; + size_t startpos(0); + size_t i; + for(i=0; i<tokenBuf.size; ++i) { + if(charbuf[i]=='\n' || charbuf[i]=='\r') { + // Ignore empty lines + if(i!=startpos) { + std::string token(&(charbuf[startpos]),i-startpos); + // Trim the token + token.erase(token.find_last_not_of(" \t")+1); + token.erase(0,token.find_first_not_of(" \t")); + if(verbose) std::cout << "TOKEN : " << token << std::endl; + newTokens.push_back(token); + } + startpos = i+1; + // TO DO: this is error-prone! + } + } + + retVal = itTokensByGuid; + } + else { + // Read tokens from the local TAG file + std::cout << "Reading tokens from TAG file" << std::endl; + // First we need to open and ASCII file, which contains POOL GUID to TAG filename mapping + // The name of this ASCII file is provided by the source argument + // The expected format: <POOL file GUID>,<TAG file name> pairs, one per line + std::string tagFileName(""); + std::ifstream ifstr; + ifstr.open(source,std::ifstream::in); + if(ifstr.fail()) { + std::cerr << "Unable to open " << source << " for reading" << std::endl; + return retVal; + } + if(verbose) std::cout << "Source file : " << source << std::endl; + while(ifstr.good()) { + std::string line; + ifstr >> line; + if(line.empty()) continue; + size_t comapos = line.find(','); + if(comapos==std::string::npos || comapos==0 || comapos==line.size()-1 + || line.find(',',comapos+1)!=std::string::npos) { + std::cerr << "WARNING: Ignoring the line with wrong format in the input file: " << line << std::endl; + continue; + } + if(line.substr(0,comapos)==guid) { + // Found the GUID. Get the TAG file name + tagFileName = line.substr(comapos+1); + if(verbose) std::cout << "TAG file : " << tagFileName << std::endl; + break; + } + } // Reading the input ASCII file + ifstr.close(); + if(tagFileName.empty()) { + std::cerr << "Unable to find GUID=" << guid << " in the input file " << source << std::endl; + return retVal; + } + + MapTokensByGuid::iterator itTokensByGuid; + try { + // Open the TAG file and get all of its tokens + pool::CollectionService collectionService; + pool::ICollection *collection = collectionService.handle(tagFileName,"RootCollection","",true); + pool::ICollectionQuery *collQuery = collection->newQuery(); + collQuery->selectAllTokens(); + pool::ICollectionCursor& cursor = collQuery->execute(); + std::cout << "Token reading OK" << std::endl; + + // Add new entry to the tokensByGuid map + itTokensByGuid = tokensByGuid.insert(std::pair<std::string,TokenVector>(guid,TokenVector())).first; + TokenVector& newTokens = itTokensByGuid->second; + + while(cursor.next()) { + const pool::TokenList &tokens = cursor.currentRow().tokenList(); + if(tokens.begin()!=tokens.end()) { + // !!! NB !!! + // Here we assume that we work with EVGEN files ! + // This means, for each event there is only one POOL token. + // For other types of input files this algorithm needs to be revised + // !!! NB !!! + pool::TokenList::const_iterator iter = tokens.begin(); + newTokens.push_back(iter->toString()); + if(verbose) std::cout << "TOKEN : " << iter->toString() << std::endl; + } + } + collection->close(); + } + catch( pool::Exception& poolException ) { + std::cerr << "pool::Exception caught! " << poolException.what() << std::endl;; + return retVal; + } + + retVal = itTokensByGuid; + } + return retVal; +} + +int main(int argc, char *argv[]) +{ + // __________________ Parse command line _________________________ + CLI cli{false,false,false,0,0}; + + if(parseCLI(argc,argv,cli)) { + printUsage(); + return 1; + } + + if(cli.help) { + printUsage(); + return 0; + } + + printConfiguration(cli); + + // ______________ Create container for caching the retrieved Tokens _______________ + MapTokensByGuid tokensByGuid; + + // ______________ Construct the Server Socket and start waiting for requests ______________ + std::string socketName(cli.yampl?cli.yampl:"TokenExtractorChannel"); + yampl::ISocketFactory* socketFactory = new yampl::SocketFactory(); + yampl::ISocket* socket = socketFactory->createServerSocket(yampl::Channel(socketName,yampl::LOCAL_PIPE),yampl::MOVE_DATA); + + while(true) { + char *requestBuffer(0), *responseBuffer(0); + std::string strEvtTokens(""); + + // Empty message to be delivered to the requester in case of an error + void* empty_message = malloc(1); + + + std::cout << "\nWaiting for an incoming request ..." << std::endl; + ssize_t requestSize = socket->recv(requestBuffer); + std::cout << "Request received." << std::endl; + + if(requestSize==1) { // Empty request received. Break the loop + std::cout << "Empty request. Exiting ...." << std::endl; + break; + } + + if(requestSize==-1) { + std::cerr << "ERROR receiving a request over the yampl channel. Ignoring this request" << std::endl; + socket->send(empty_message,1); + continue; + } + + // Decode the request: GUID,evtNumber[,evtNumber] + std::string strRequest(requestBuffer,requestSize); + if(cli.verbose) std::cout << "Request: " << strRequest << std::endl; + size_t comapos = strRequest.find(','); + if(comapos==std::string::npos) { + std::cerr << "ERROR: Wrong format of the incoming request: " << strRequest << std::endl; + socket->send(empty_message,1); + continue; + } + + std::string guid = strRequest.substr(0,comapos); + std::cout << "GUID=" << guid << std::endl; + + // _____________ Locate GUID in the tokens map. If does not exist, then retrieve from the source __________________ + MapTokensByGuid::const_iterator guidIterator = tokensByGuid.find(guid); + if(guidIterator==tokensByGuid.end()) { + guidIterator = retrieveTokens(cli.useEI,cli.verbose,cli.source,guid,tokensByGuid); + if(guidIterator==tokensByGuid.end()) { + std::cerr << "ERROR: unable to retrieve event tokens for GUID=" << guid << std::endl; + socket->send(empty_message,1); + continue; + } + } + + const TokenVector& cachedTokens = guidIterator->second; + // ________________________ Parse the string with event numbers, get corresponding Tokens + // and store the Tokens in a coma-separated string strEvtTokens __________________________ + size_t startpos(comapos+1); + size_t endpos = strRequest.find(',',startpos); + unsigned evtPos(0); + while(endpos!=std::string::npos) { + evtPos = std::atoi(strRequest.substr(startpos,endpos-startpos).c_str()); + std::cout << "Event number = " << evtPos << std::endl; + startpos = endpos+1; + endpos = strRequest.find(',',startpos); + if(evtPos<=cachedTokens.size() && evtPos>0) { + if(!strEvtTokens.empty()) + strEvtTokens += std::string(","); + strEvtTokens += cachedTokens[evtPos-1]; + if(cli.verbose) std::cout << "Corresponding Token " << cachedTokens[evtPos-1] << std::endl; + } + else { + std::cerr << "ERROR: wrong event positional number received " << evtPos << std::endl; + socket->send(empty_message,1); + continue; + } + } + evtPos = std::atoi(strRequest.substr(startpos).c_str()); + std::cout << "Event number = " << evtPos << std::endl; + if(evtPos<=cachedTokens.size() && evtPos>0) { + if(!strEvtTokens.empty()) + strEvtTokens += std::string(","); + strEvtTokens += cachedTokens[evtPos-1]; + if(cli.verbose) std::cout << "Corresponding Token " << cachedTokens[evtPos-1] << std::endl; + } + else { + std::cerr << "ERROR: wrong event positional number received " << evtPos << std::endl; + socket->send(empty_message,1); + continue; + } + + if(cli.verbose) std::cout << "Response message: " << strEvtTokens << std::endl; + + // Construct a response buffer: evtToken[,evtToken] + responseBuffer = (char*)malloc(strEvtTokens.size()); + memcpy(responseBuffer,strEvtTokens.data(),strEvtTokens.size()); + socket->send(responseBuffer,strEvtTokens.size()); + std::cout << "Response sent" << std::endl; + + free(empty_message); + } + + delete socket; + return 0; +} diff --git a/Database/APR/CollectionUtilities/utilities/coll_insertGuidToCatalog.cpp b/Database/APR/CollectionUtilities/utilities/coll_insertGuidToCatalog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5223fdca9face8646207885e405697a5c3383a2c --- /dev/null +++ b/Database/APR/CollectionUtilities/utilities/coll_insertGuidToCatalog.cpp @@ -0,0 +1,147 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include <iostream> +#include <stdexcept> +#include <memory> + +#include "PersistentDataModel/Guid.h" +#include "FileCatalog/IFileCatalog.h" +#include "FileCatalog/FCLeaf.h" +#include "FileCatalog/FCImpl.h" +#include "FileCatalog/FCEntry.h" +#include "FileCatalog/URIParser.h" + +class InsertFileToCatalogApplication { +public: + InsertFileToCatalogApplication( int argc, char* argv[] ); + ~InsertFileToCatalogApplication(){} + bool parseArguments(); + void execute(); + void printSyntax(); +private: + const std::string executableName; + std::string technologyName; + std::string fcURL; + std::vector< std::string > fileNames; + std::string guid; +}; + +InsertFileToCatalogApplication::InsertFileToCatalogApplication( int argc, char* argv[] ): + executableName( std::string( argv[0] ) ), + technologyName( "PoolCollection" ), + fcURL( "" ), + fileNames() +{ + for ( int i = 1; i < argc; ++i ) + fileNames.push_back( std::string( argv[i] ) ); +} + +bool +InsertFileToCatalogApplication::parseArguments() +{ + std::vector< std::string > theFiles; + std::vector< std::string > args = fileNames; + unsigned int excludedArgument = 0; + Guid dummy; + Guid::create(dummy); + guid = dummy.toString(); + for ( unsigned int iArg = 0; iArg < args.size(); ++iArg ) { + if ( iArg > 0 && iArg == excludedArgument ) + continue; + const std::string& arg = args[iArg]; + if ( arg == "-u" ) { + unsigned int nextArgumentIndex = iArg + 1; + if ( nextArgumentIndex < args.size() ) { + excludedArgument = nextArgumentIndex; + fcURL = args[nextArgumentIndex]; + } + } + else if ( arg == "-t" ) { + unsigned int nextArgumentIndex = iArg + 1; + if ( nextArgumentIndex < args.size() ) { + excludedArgument = nextArgumentIndex; + technologyName = args[nextArgumentIndex]; + } + } + else if ( arg == "-g" ) { + unsigned int nextArgumentIndex = iArg + 1; + if ( nextArgumentIndex < args.size() ) { + excludedArgument = nextArgumentIndex; + if (args[nextArgumentIndex].length()==36) guid = args[nextArgumentIndex]; + else std::cout << "-g argument does not match needed GUID length" << std::endl; + } + } + else { + theFiles.push_back( arg ); + } + } + + // Check now the values. + if ( theFiles.empty() ) return false; + if ( theFiles.size() > 1 ) { + std::cout << "Too many files, taking first only! " << std:: endl; + return false; + } + fileNames.clear(); + fileNames.push_back(theFiles[0]); + + + return true; +} + + +void +InsertFileToCatalogApplication::execute() +{ + // Do the job here + + bool allOK = true; + + // Open the file catalog and insert the pfn/fid/technology + if ( allOK ) { + pool::URIParser p( fcURL ); + p.parse(); + std::auto_ptr<pool::IFileCatalog> catalog( new pool::IFileCatalog ); + if ( ! catalog.get() ) + throw std::runtime_error( "Could not create a file catalog" ); + catalog->setWriteCatalog( p.contactstring() ); + catalog->connect(); + catalog->start(); + + pool::PFNEntry entry( fileNames[0], guid, technologyName ); + dynamic_cast< pool::FCLeaf* >( catalog->getWriteCatalog() )->getImpl()->insertPFN( entry ); + + catalog->commit(); + } +} + +void +InsertFileToCatalogApplication::printSyntax() +{ + std::cout << "Syntax : " << executableName << " [-u fileCatalog] [-t technologyType] [-g input guid] file" << std::endl; +} + + + + +int main( int argc, char* argv[] ) +{ + try { + InsertFileToCatalogApplication app( argc, argv ); + if ( app.parseArguments() ) { + app.execute(); + } + else app.printSyntax(); + } + catch ( std::exception& error ) { + std::cerr << error.what() << std::endl; + return 1; + } + catch ( ... ) { + std::cerr << "Funny error ..." << std::endl; + return 1; + } + return 0; +}