diff --git a/Database/FileStager/FileStager/FileStagerAlg.h b/Database/FileStager/FileStager/FileStagerAlg.h new file mode 100644 index 0000000000000000000000000000000000000000..cdbf501858833f32d0ba5588ddb55f28291035dc --- /dev/null +++ b/Database/FileStager/FileStager/FileStagerAlg.h @@ -0,0 +1,66 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef FILESTAGERALG_H +#define FILESTAGERALG_H + +#include "GaudiKernel/Algorithm.h" +#include "GaudiKernel/IIncidentListener.h" + +#include <string> +#include <vector> + +class StoreGateSvc; +class TStopwatch; +class TH1D; + +///////////////////////////////////////////////////////////////////////////// + +class FileStagerAlg : public Algorithm, + public IIncidentListener { + +public: + FileStagerAlg( const std::string& name, ISvcLocator* pSvcLocator ); + ~FileStagerAlg(); + + StatusCode initialize(); + StatusCode execute(); + StatusCode finalize(); + + void handle(const Incident& inc); + +private: + + void configStager(); + void loadStager(); + void releasePrevFile(); + void setupNextFile(); + + int m_pipeLength; + bool m_verbose; + bool m_verboseWait; + bool m_firstFileAlreadyStaged; + std::string m_treeName; + std::string m_infilePrefix; + std::string m_outfilePrefix; + std::string m_cpCommand; + std::string m_baseTmpdir; + std::string m_logfileDir; + bool m_keepLogfiles; + bool m_storeStats; + std::vector< std::string > m_cpArg; + std::vector< std::string > m_inCollection; + std::vector< std::string > m_outCollection; + + int _numEventsInFile; + int _event; + std::vector< std::string >::iterator _fItr; + std::string _prevFile; + + TStopwatch* _stopwatch; + double _waittime; + TH1D* _waithist; +}; + +#endif // FILESTAGERALG_H diff --git a/Database/FileStager/FileStager/TCopyChain.h b/Database/FileStager/FileStager/TCopyChain.h new file mode 100644 index 0000000000000000000000000000000000000000..3888a43fb0fd0b99a093a0c63accb32aebd1dd21 --- /dev/null +++ b/Database/FileStager/FileStager/TCopyChain.h @@ -0,0 +1,51 @@ +// This file's extension implies that it's C, but it's really -*- C++ -*-. + +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TCOPYCHAIN_H +#define TCOPYCHAIN_H + +#include <TROOT.h> +#include "TChain.h" +#include "TString.h" + +class TFile; + +class TCopyChain : public TChain +{ + public: + TCopyChain(); + TCopyChain (const char* name, const char* title = ""); + virtual ~TCopyChain(); + + virtual Long64_t GetReadEntry() const; + virtual Long64_t GetEntries() const; + virtual Long64_t GetEntries(const char* sel); + virtual Long64_t LoadTree(Long64_t entry); + + virtual Int_t Add(TChain* chain) { return TChain::Add(chain); } + virtual Int_t Add(const char* name, Long64_t nentries = kBigNumber); + //virtual Int_t AddFile(const char* name, Long64_t nentries = kBigNumber, const char* tname = ""); + + static void SetOriginalTChain(bool originalTChain=true) { _originalTChain=originalTChain; } + virtual bool IsNewFile(){ return _isNewFile; } + virtual TString GetCurrentLocalFileName(); + + private: + static bool _originalTChain; + bool _releasedLastFile; + int _bigNumber; + + TFile* _curFile; //! + TString _prevFileName; + bool _isNewFile; + + ClassDef(TCopyChain,1) +}; + +//bool TCopyChain::_originalTChain=true; + +#endif + diff --git a/Database/FileStager/FileStager/TCopyFile.h b/Database/FileStager/FileStager/TCopyFile.h new file mode 100644 index 0000000000000000000000000000000000000000..92131db764049a06008af52567762b79eb054f9d --- /dev/null +++ b/Database/FileStager/FileStager/TCopyFile.h @@ -0,0 +1,47 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef __TGRIDCOPYFILE_HH__ +#define __TGRIDCOPYFILE_HH__ + +#include <iostream> +#include <TROOT.h> +#include <TFile.h> +#include <string> + +class TCopyFile : public TFile +{ + public: + TCopyFile(); + TCopyFile(const char *fname, Option_t *option="", const char *ftitle="", Int_t compress=1); + virtual ~TCopyFile(); + + virtual const char* GetName() const { return _inFile.c_str(); } + + static void SetOriginalTFile(bool originalTFile=true) { _originalTFile=originalTFile; } + static void SetFixPrexix(const char* in, const char* out) { + _prefixin = in; + _prefixout = out; + } + + static void PrintInfo(); + + private: + const char* GetCopyFile(const char *fname); + + static bool _originalTFile; + static std::string _prefixin; + static std::string _prefixout; + + std::string _inFile; + + ClassDef(TCopyFile,1) +} ; + +//bool TCopyFile::_originalTFile=true; +//std::string TCopyFile::_prefixin; +//std::string TCopyFile::_prefixout; + +#endif + diff --git a/Database/FileStager/FileStager/TStageFileInfo.h b/Database/FileStager/FileStager/TStageFileInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..a0390616e05e0aae5271f53949d2aa8212778fb9 --- /dev/null +++ b/Database/FileStager/FileStager/TStageFileInfo.h @@ -0,0 +1,51 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + + +#ifndef TSTAGEFILEINFO +#define TSTAGEFILEINFO + +#include "TROOT.h" +#include "TObject.h" + +#ifndef __CINT__ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <signal.h> +#include <sys/wait.h> +#endif +#include <string> +#include <map> + +using namespace std ; + +class TStageFileInfo : public TObject +{ + public: + enum Status { UNKNOWN, TOBESTAGED, STAGING, STAGED, + RELEASED, ERRORSTAGING, KILLEDSTAGING, + ERRORRELEASED, KILLERROR }; + + TStageFileInfo() : pid(-999) {}; + virtual ~TStageFileInfo() {}; + + int pid; + string inFile; + string outFile; + Status status; +#ifndef __CINT__ + struct stat64 statFile; +#endif + string stout; + string sterr; + + private: + + ClassDef(TStageFileInfo,1) +} ; + + +#endif diff --git a/Database/FileStager/FileStager/TStageManager.h b/Database/FileStager/FileStager/TStageManager.h new file mode 100644 index 0000000000000000000000000000000000000000..6134fdca28290c2f5f42a94609407b511e46ce8f --- /dev/null +++ b/Database/FileStager/FileStager/TStageManager.h @@ -0,0 +1,124 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + + +#ifndef TSTAGEMANAGER +#define TSTAGEMANAGER + +#include <TROOT.h> +#include "FileStager/TStageFileInfo.h" +#include "FileStager/TStagerInfo.h" + +#include "TObject.h" + +#ifndef __CINT__ +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <iostream> +#include <signal.h> +//#include <sys/signal.h> +#endif + +#include <set> +#include <list> +#include <string> +#include <vector> +#include <map> +#include <algorithm> + +using namespace std ; + +class TStagerInfo; + +typedef char* pchar; + +class TStageManager : public TObject +{ + public: + + // access to the single instance of this class (created statically on first request) + static TStageManager& instance(); + + void addToList(const char* filename); + void releaseFile(const char* fname); + void releaseAll(); + const char* getFile(const char* fname); + const string getTmpFilename(const char* filename); + TStageFileInfo::Status getStatusOf(const char* filename, bool update=true); + + void setBaseTmpdir(const char* baseTmpdir); + void setCpCommand(const char* cpcommand); + void addCpArg(const char* cparg); + void setPipeLength(const int& pipeLength); + void setInfilePrefix(const char* infilePrefix); + void setOutfilePrefix(const char* outfilePrefix); + void setLogfileDir(const char* logfileDir); + void addPrefixFix(const char* in, const char* out); + + void verbose(bool v=true) { _verbose=v; } + void verboseWait(bool v=true) { _verbwait=v; } + void tryInfRestage(bool t=true) { _tryInfRestage=t; } + void firstFileAlreadyStaged(bool f=true) { _firstFileANDAlreadyStaged=f; } + void keepLogfiles(bool k=true) { _keepLogfiles=k; } + void setPidInLognames(bool p=true) { _pidInLognames=p; } + void keepStagedfiles(bool k=true) { _keepStagedfiles=k; } + void checkForPreviousStage(bool c=true) { _checkForPreviousStage=c; } + void setInteractive(bool i=true) { _keepLogfiles=i; _keepStagedfiles=i; _checkForPreviousStage=i; } + void print(); + void printProblemFiles(); + + int getNstaging(); + int getNtobestaged(); + int getNstaged(); + int getNtotal(); + int getSizeStaged(); + int getNproblem(); + + private: + + // default constructor (called by instance()) + TStageManager(); + virtual ~TStageManager(); + + // Private copy constructor : not implemented! + TStageManager( TStageManager&); + // Private assignment : not implemented! + TStageManager& operator= (TStageManager&); + + void stageNext(bool forceStage=false); + void updateStatus(); + void trim(string& input); + void removePrefixOf(string& filename); + void removeFile(string filename); + void fixRootInPrefix(string& tmpname); + + void submitStageMonitor(); + bool _submittedStageMonitor; + + bool fileExists(const char* fileName); + + list< string > _toBeStagedList; + map<string,TStageFileInfo> _stageMap; + + static TStagerInfo _stagerInfo; + + bool _verbose; + bool _verbwait; + bool _tryInfRestage; + bool _firstFileANDAlreadyStaged; + bool _keepLogfiles; + bool _pidInLognames; + bool _keepStagedfiles; + bool _checkForPreviousStage; + + ClassDef(TStageManager,1) +} ; + +#endif + diff --git a/Database/FileStager/FileStager/TStagerInfo.h b/Database/FileStager/FileStager/TStagerInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..c618c460f587a49d8b5514399085fd6c1faf0807 --- /dev/null +++ b/Database/FileStager/FileStager/TStagerInfo.h @@ -0,0 +1,60 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TSTAGERINFO +#define TSTAGERINFO + +#include "TObject.h" +#include <TROOT.h> + +#ifndef __CINT__ +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <unistd.h> +#include <stdlib.h> +//#include <stdio.h> +#include <iostream> +#include <signal.h> +//#include <sys/signal.h> +#endif + +#include <set> +#include <string> +#include <vector> +#include <map> + +using namespace std ; + +typedef char* pchar; + +class TStagerInfo : public TObject +{ + public: + TStagerInfo(); + virtual ~TStagerInfo(); + + void setTmpdir(); + void setDefaultTmpdir(); + + int pipeLength; + int pid; + string infilePrefix; + string outfilePrefix; + string logfileDir; + string baseTmpdir; + string tmpdir; + string cpcommand; + string stagemonitorcmd; + vector<string> cparg; + map<string,string> inPrefixMap; + + private: + + ClassDef(TStagerInfo,1) +} ; + +#endif + diff --git a/Database/FileStager/README b/Database/FileStager/README new file mode 100644 index 0000000000000000000000000000000000000000..77e28c155c95d4e4cee69f6abb5fde3559e32d92 --- /dev/null +++ b/Database/FileStager/README @@ -0,0 +1,7 @@ +Latest update: 2009-10-26, by Max Baak (mbaak@cern.ch) + +Documentation: +-------------- +https://twiki.cern.ch/twiki/bin/view/AtlasProtected/FileStager +for the official FileStager twiki, including the latest tag to use. + diff --git a/Database/FileStager/Root/LinkDef.h b/Database/FileStager/Root/LinkDef.h new file mode 100644 index 0000000000000000000000000000000000000000..c26f632f7b68c9175289015a09b4b9f4c3968923 --- /dev/null +++ b/Database/FileStager/Root/LinkDef.h @@ -0,0 +1,17 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef __FILESTAGER__ +#define __FILESTAGER__ + +#ifdef __CINT__ +#pragma link C++ class TStagerInfo; +#pragma link C++ class TStageFileInfo-; +#pragma link C++ class TStageManager; +#pragma link C++ class TCopyChain; +#pragma link C++ class TCopyFile; +#endif + + +#endif diff --git a/Database/FileStager/Root/TCopyChain.cxx b/Database/FileStager/Root/TCopyChain.cxx new file mode 100644 index 0000000000000000000000000000000000000000..79f83611209636d7234eee5371da1af3b98ec03f --- /dev/null +++ b/Database/FileStager/Root/TCopyChain.cxx @@ -0,0 +1,116 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "FileStager/TCopyChain.h" +#include "FileStager/TStageManager.h" +#include "TROOT.h" +#include "TError.h" +#include "TFile.h" +#include <iostream> + +ClassImp(TCopyChain) + +bool TCopyChain::_originalTChain=true; + +TCopyChain::TCopyChain() + : TChain () + , _releasedLastFile(false) + , _bigNumber(1234567890) // big number like GetEntriesFast() + , _curFile(0) + , _prevFileName("") + , _isNewFile(true) +{ +} + + +TCopyChain::TCopyChain (const char* name, + const char* title /*= ""*/) + : TChain (name, title) + , _releasedLastFile(false) + , _bigNumber(1234567890) // big number like GetEntriesFast() + , _curFile(0) + , _prevFileName("") + , _isNewFile(true) +{ +} + + +TCopyChain::~TCopyChain() +{ + //this->TChain::~TChain(); +} + + +Long64_t TCopyChain::GetReadEntry() const +{ + Long64_t entry = TChain::GetReadEntry(); + if (_originalTChain) { return entry; } + + // FIXME: Check --- i think i saw that the behavior of + // TChain::GetReadEntry changed in 5.16. + if (fTreeNumber != -1) entry += fTreeOffset[fTreeNumber]; + return entry; +} + + +Long64_t TCopyChain::GetEntries() const +{ + if (_originalTChain) return TChain::GetEntries(); + else return _bigNumber; +} + + +Long64_t TCopyChain::GetEntries(const char* sel) +{ + if (_originalTChain) return TChain::GetEntries(sel); + else return _bigNumber; +} + + +Long64_t TCopyChain::LoadTree(Long64_t entry) +{ + if (!_originalTChain && _curFile!=0) { + _prevFileName = _curFile->GetName(); + } + + Long64_t readEntry = this->TChain::LoadTree(entry); + if (_originalTChain) return readEntry; + + // check if file has changed + _curFile=TChain::GetCurrentFile(); + + if (_curFile!=0) { + if (_prevFileName!=_curFile->GetName()) { + _isNewFile=true; + std::cout << "TCopyChain::LoadTree() : Opened new file <" + << _curFile->GetName() + << ">" + << std::endl; + } + else{ _isNewFile=false; } + } + else{ _isNewFile=true; } + + return readEntry; +} + + +TString TCopyChain::GetCurrentLocalFileName() +{ + if(_curFile!=0){ + TStageManager& manager(TStageManager::instance()); + return TString(manager.getTmpFilename(_curFile->GetName())); + } + return TString(0); +} + +Int_t TCopyChain::Add(const char* name, Long64_t nentries) +{ + if (_originalTChain) return TChain::Add(name, nentries); + + TStageManager& manager(TStageManager::instance()); + manager.addToList(name); + return this->TChain::Add(name, nentries); +} + diff --git a/Database/FileStager/Root/TCopyFile.cxx b/Database/FileStager/Root/TCopyFile.cxx new file mode 100644 index 0000000000000000000000000000000000000000..ed0d10c8646a5c62f65ac90bf565141c33425d59 --- /dev/null +++ b/Database/FileStager/Root/TCopyFile.cxx @@ -0,0 +1,69 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "FileStager/TCopyFile.h" +#include "FileStager/TStageManager.h" +//#include "TROOT.h" +//#include "TEnv.h" + +ClassImp(TCopyFile) + +bool TCopyFile::_originalTFile=true; +std::string TCopyFile::_prefixin; +std::string TCopyFile::_prefixout; + + +TCopyFile::TCopyFile() + : TFile() +{ +} + + +TCopyFile::TCopyFile(const char *fname, Option_t *option, const char *ftitle, Int_t compress) + : TFile(GetCopyFile(fname),option,ftitle,compress) + , _inFile(fname) +{ +} + + +TCopyFile::~TCopyFile() +{ + //this->TFile::~TFile(); + if (_originalTFile) return; + + TStageManager& manager(TStageManager::instance()); + manager.releaseFile(_inFile.c_str()); +} + + +const char* +TCopyFile::GetCopyFile(const char* fname) +{ + string filename(fname); + + //cout << "TCopyFile::GetCopyFile fname = " << (!filename.empty()?filename:"<null>") << endl ; + if (_originalTFile) return filename.c_str(); + + TStageManager& manager(TStageManager::instance()); + return manager.getFile(filename.c_str()); +} + + +void +TCopyFile::PrintInfo() +{ + /* xrootd */ + //gEnv->SetValue("XNet.ReadAheadSize",0); + //gEnv->SetValue("XNet.Debug",3); + //gDebug = 7; + + std::cout<<"===================================================================" << std::endl; + std::cout<<"|| TFile Information " << std::endl; + std::cout<<"|| Read Bytes (fgBytesRead): " << TFile::fgBytesRead << std::endl; + std::cout<<"|| Write Bytes (fgBytesWrite): " << TFile::fgBytesWrite << std::endl; + std::cout<<"|| File Counter (fgFileCounter): " << TFile::fgFileCounter << std::endl; + std::cout<<"|| Read Calls (fgReadCalls): " << TFile::fgReadCalls << std::endl; + std::cout<<"===================================================================" << std::endl; +} + diff --git a/Database/FileStager/Root/TStageFileInfo.cxx b/Database/FileStager/Root/TStageFileInfo.cxx new file mode 100644 index 0000000000000000000000000000000000000000..0010a5e8e760cbce51ea4ac55e3507b27e3eff94 --- /dev/null +++ b/Database/FileStager/Root/TStageFileInfo.cxx @@ -0,0 +1,13 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "FileStager/TStageFileInfo.h" + +ClassImp(TStageFileInfo) + +void +TStageFileInfo::Streamer(TBuffer& /*R__b*/) +{ +} + diff --git a/Database/FileStager/Root/TStageManager.cxx b/Database/FileStager/Root/TStageManager.cxx new file mode 100644 index 0000000000000000000000000000000000000000..0c01f1d8c9bf9e4cf0e988be8dc36bd206ff71f3 --- /dev/null +++ b/Database/FileStager/Root/TStageManager.cxx @@ -0,0 +1,822 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "FileStager/TStageManager.h" +#include "FileStager/TCopyFile.h" +#include <libgen.h> +#include <assert.h> + + +ClassImp(TStageManager) + + +TStagerInfo TStageManager::_stagerInfo; + + +TStageManager::TStageManager() + : _submittedStageMonitor(false) + , _verbose(false) + , _verbwait(false) + , _tryInfRestage(false) + , _firstFileANDAlreadyStaged(false) + , _keepLogfiles(false) + , _pidInLognames(true) + , _keepStagedfiles(false) + , _checkForPreviousStage(false) +{ + _stageMap.clear(); + _toBeStagedList.clear(); +} + + +TStageManager::~TStageManager() +{ + print(); + + // terminating session, so cleanup + //_keepStagedfiles = false; + releaseAll(); + + // remove temporary stage directory + if (_stagerInfo.tmpdir.compare(_stagerInfo.baseTmpdir)!=0) + rmdir(_stagerInfo.tmpdir.c_str()); + + // TCopyFile destructor can no longer call stagemanager after this has been destroyed. + TCopyFile::SetOriginalTFile(); +} + + +TStageManager& +TStageManager::instance() +{ + // created on first call, deleted after main() + static TStageManager _instance; + return _instance; +} + + +void +TStageManager::addToList(const char* filename) +{ + string input(filename); + trim(input); + + if (_verbose) { cout << "TStageManager::addToList() : <" << input << ">" << endl; } + + if (_firstFileANDAlreadyStaged) { + _stageMap[input] = TStageFileInfo(); + _stageMap[input].status = TStageFileInfo::STAGED; + string inFile(input); + removePrefixOf(inFile); + _stageMap[input].inFile = inFile; + _stageMap[input].outFile = getTmpFilename(input.c_str()); + _stageMap[input].stout = _stageMap[input].outFile + "_stage.out"; + _stageMap[input].sterr = _stageMap[input].outFile + "_stage.err"; + if (!_stagerInfo.logfileDir.empty()) { + _stageMap[input].stout = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[input].stout.c_str()); + _stageMap[input].sterr = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[input].sterr.c_str()); + } + _firstFileANDAlreadyStaged = false; // next file + } + + list<string>::iterator itrF = find(_toBeStagedList.begin(),_toBeStagedList.end(),input); + if (_stageMap.find(input)==_stageMap.end()) { + if (itrF==_toBeStagedList.end()) _toBeStagedList.push_back(input); + } else if ( _stageMap.find(input)->second.status != TStageFileInfo::STAGED && + _stageMap.find(input)->second.status != TStageFileInfo::STAGING ) { + // not interested. cleanup + releaseFile(input.c_str()) ; + _stageMap.erase(_stageMap.find(input)); + if (itrF==_toBeStagedList.end()) _toBeStagedList.push_back(input); + } + + stageNext(); +} + + +void +TStageManager::releaseFile(const char* fname) +{ + string tmpname(fname); + trim(tmpname); + fixRootInPrefix(tmpname); + const char* filename = tmpname.c_str(); + + // first update status + updateStatus(); + + list<string>::iterator itrF = find(_toBeStagedList.begin(),_toBeStagedList.end(),filename); + if(itrF!=_toBeStagedList.end()) { + _toBeStagedList.erase(itrF); + return; + } + + if(_stageMap.find(filename)==_stageMap.end()) return; + + if (_stageMap[filename].status==TStageFileInfo::RELEASED || + _stageMap[filename].status==TStageFileInfo::KILLEDSTAGING || + _stageMap[filename].status==TStageFileInfo::ERRORRELEASED || + _stageMap[filename].status==TStageFileInfo::KILLERROR) { + return; + } + + if (_verbose) cout << "TStageManager::releaseFile() : " << filename << endl; + + if (_stageMap[filename].status==TStageFileInfo::STAGING) { + // kill process first + int killReturn = kill(_stageMap[filename].pid, SIGKILL); + + if( killReturn == ESRCH) { + // pid does not exist + if (_verbose) cout << "Group does not exist!" << endl; + // assume file is done staging + // fill statistics first + stat64(_stageMap[filename].outFile.c_str(),&(_stageMap[filename].statFile)); + _stageMap[filename].status=TStageFileInfo::STAGED; + } else if( killReturn == EPERM) { + // No permission to send kill signal + // This should never happen + if (_verbose) cout << "No permission to send kill signal!" << endl; + _stageMap[filename].status=TStageFileInfo::KILLERROR; + } else { + if (_verbose) cout << "Kill signal sent. All Ok!" << endl; + _stageMap[filename].status=TStageFileInfo::KILLEDSTAGING; + } + } + + if (_stageMap[filename].status==TStageFileInfo::ERRORSTAGING) { + _stageMap[filename].status=TStageFileInfo::ERRORRELEASED; + } + + if (_stageMap[filename].status==TStageFileInfo::STAGED) { + if (!_keepStagedfiles) { + removeFile(_stageMap[filename].outFile); + _stageMap[filename].status=TStageFileInfo::RELEASED; + } + } + + if (!_keepLogfiles) { + removeFile(_stageMap[filename].stout); + removeFile(_stageMap[filename].sterr); + } +} + + +void TStageManager::releaseAll() +{ + _toBeStagedList.clear(); + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); itr=_stageMap.begin()) { + string first = (itr->first); + releaseFile(first.c_str()); + _stageMap.erase(itr); + } +} + + +const char* +TStageManager::getFile(const char* fname) +{ + string tmpname(fname); + trim(tmpname); + fixRootInPrefix(tmpname); + const char* filename = tmpname.c_str(); + + string name(filename); + trim(name); + removePrefixOf(name); + + if (_verbose) cout << "TStageManager::getFile() : " << filename << endl; + + // first update status + updateStatus(); + + // file not found -> better start staging immediately. + list<string>::iterator itrF = find(_toBeStagedList.begin(),_toBeStagedList.end(),filename); + if (_stageMap.find(filename)==_stageMap.end() && + (itrF==_toBeStagedList.end()) ) { + if (_verbose) cout << "TStageManager::getFile() : " << filename << " unknown. Start immediate staging." << endl; + _toBeStagedList.push_front(filename); + } + + // prioritize file needs to be staged + itrF = find(_toBeStagedList.begin(),_toBeStagedList.end(),filename); + if (itrF!=_toBeStagedList.end() && _stageMap.find(filename)==_stageMap.end()) { + // move file to front + _toBeStagedList.erase(itrF); + _toBeStagedList.push_front(filename); + + if (_verbose) cout << "TStageManager::getFile() : " << filename << ". Forced staging." << endl; + stageNext(true); // forced stage right away + } + + if (_stageMap.find(filename)!=_stageMap.end()) { + if (_verbose) cout << "TStageManager::getFile() : Checking staging status of " << filename << endl; + + // file still staging + if (_stageMap[filename].status==TStageFileInfo::STAGING) { + + // check status + pid_t pID = _stageMap[filename].pid; + int childExitStatus; + + // wait till staging is done + if (_verbose || _verbwait) { cout << "TStageManager::getFile() : Waiting till <" + << filename + << "> is staged." + << endl; } + + waitpid( pID, &childExitStatus, 0); + + if( !WIFEXITED(childExitStatus) ) { + if (_verbose) { cout << "waitpid() exited with status= " + << WEXITSTATUS(childExitStatus) + << endl; } + //_stageMap[filename].status = TStageFileInfo::ERRORSTAGING; + } else if( WIFSIGNALED(childExitStatus) ) { + if (_verbose) { cout << "waitpid() exited with signal: " + << WTERMSIG(childExitStatus) + << endl; } + //_stageMap[filename].status = TStageFileInfo::ERRORSTAGING; + } else { + // child exited okay + if (_verbose) { cout << "waitpid() okay." << endl; } + //_stageMap[filename].status = TStageFileInfo::STAGED; + } + } + + // check if file _really_ exists. Sometimes staging returns empty files. + bool fexists = fileExists(_stageMap[filename].outFile.c_str()); + if (fexists) _stageMap[filename].status = TStageFileInfo::STAGED; + else { + if (_verbose) cout << "TStageManager::getFile() : ERROR : staging of file <" << filename << "> failed. Note: This could be an empty file or time-out." << endl; + } + + // file is staged + if (_stageMap[filename].status == TStageFileInfo::STAGED) { + if (_verbose) cout << "TStageManager::getFile() : <" << filename << "> finished staging" << endl; + + stat64(_stageMap[filename].outFile.c_str(),&(_stageMap[filename].statFile)); + // start next stage + stageNext(); + return _stageMap[filename].outFile.c_str(); + + // file has already been run over ... + } else if (_stageMap[filename].status == TStageFileInfo::RELEASED) { + if (_verbose) cout << "TStageManager::getFile() : WARNING : <" << filename << "> already run over!" << endl; + /// Cleanup and let's try again. + if (_verbose) cout << "TStageManager::getFile() : restaging <" << filename << "> immediately." << endl; + releaseFile(fname); + _stageMap.erase(_stageMap.find(filename)); + return getFile(fname); + + // that's funny ... staged but no file! + } else { + + if (_tryInfRestage) { // false by default -- restaging done by wrapper script + //Something went wrong. Cleanup and let's try again. + if (_verbose) cout << "TStageManager::getFile() : ERROR : restaging <" << filename << "> immediately." << endl; + releaseFile(fname); + _stageMap.erase(_stageMap.find(filename)); + return getFile(fname); + } + + cout << "TStageManager::getFile() : ERROR : staging of <" << filename << "> failed (after N retries). Giving up." << endl; + } + } + + // This should not be reached + if (_verbose) cout << "TStageManager::getFile() : ERROR <" << filename << "> not recognized." << endl; + + stageNext(); + + return name.c_str(); // Let TFile handle error +} + + +const string +TStageManager::getTmpFilename(const char* filename) +{ + if (_stageMap.find(filename)!=_stageMap.end()) + if (!_stageMap[filename].outFile.empty()) + return _stageMap[filename].outFile.c_str(); + + string infile(filename); trim(infile); + string tmpfile(filename); trim(tmpfile); + //removePrefixOf(infile); + string::size_type pos = tmpfile.find_last_of("/"); + tmpfile = _stagerInfo.tmpdir + "/tcf_" + /*string(Form("tcf%d_",getpid())) +*/ tmpfile.substr(pos+1,tmpfile.size()-pos-1); + + // double check!! + string::size_type posi = infile.find_last_of("/"); + string first = infile.substr(posi+1,infile.size()-posi-1); + posi = tmpfile.find(first); + tmpfile.erase( posi + first.size() ); + + if (_verbose) cout << "TStageManager::getTmpFilename() : <" << tmpfile << "> <" + << tmpfile.c_str() << ">" + << endl; + + return tmpfile; +} + + +TStageFileInfo::Status +TStageManager::getStatusOf(const char* filename, bool update) +{ + if (update) updateStatus(); + + list<string>::iterator itrF = find(_toBeStagedList.begin(),_toBeStagedList.end(),filename); + if(itrF!=_toBeStagedList.end()) { + return TStageFileInfo::TOBESTAGED; + } + + if(_stageMap.find(filename)!=_stageMap.end()) + return _stageMap[filename].status; + + return TStageFileInfo::UNKNOWN; +} + + +void +TStageManager::print() +{ + updateStatus(); + TStageFileInfo::Status status; + string filename; + Long64_t sumsize(0); + Long64_t sumfiles(0); + + int nstaging = getNstaging(); + int ntobestaged = getNtobestaged(); + int nproblem = getNproblem(); + int ntotal = getNtotal(); + + list<string>::iterator itrb = _toBeStagedList.begin(); + for (; itrb!=_toBeStagedList.end(); ++itrb) { + filename = *itrb; + status = getStatusOf(filename.c_str(),false); + if (_verbose) cout << "Status <" << filename << "> : " << status << endl; + } + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + filename = (itr->first); + status = getStatusOf(filename.c_str(),false); + if (_verbose) cout << "Status <" << filename << "> : " << status << endl; + if (status == TStageFileInfo::RELEASED || status == TStageFileInfo::STAGED) { + if (_verbose) { cout << (itr->second).statFile.st_atime << " " + << (itr->second).statFile.st_mtime << " " + << (itr->second).statFile.st_ctime << " " + << (itr->second).statFile.st_size << " " + << endl; } + sumsize += (itr->second).statFile.st_size ; + sumfiles++; + } + } + + cout << "TStageManager::print() : " + << "staged: " << sumfiles << " (" << sumsize/(1024*1024) << " mb)" + << " , staging: " << nstaging + << " , to be staged: " << ntobestaged + << " , problems: " << nproblem + << " , total: " << ntotal << " files." + << endl; +} + + +void TStageManager::printProblemFiles() +{ + updateStatus(); + TStageFileInfo::Status status; + string filename; + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + filename = (itr->first); + status = getStatusOf(filename.c_str(),false); + + if (status != TStageFileInfo::RELEASED && status != TStageFileInfo::STAGED && status != TStageFileInfo::STAGING) { + cout << filename << endl; + } + } +} + + +int TStageManager::getNtobestaged() +{ + int nentries( static_cast<int>(_toBeStagedList.size()) ); + if (_verbose) { cout << "TStageManager::getNtobestaged() = " << nentries << endl; } + return nentries; +} + + +int TStageManager::getNstaging() +{ + int nentries(0); + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + if ((itr->second).status == TStageFileInfo::STAGING) nentries++; + } + if (_verbose) { cout << "TStageManager::getNstaging() = " << nentries << endl; } + return nentries; +} + + +int TStageManager::getNstaged() +{ + int nentries(0); + TStageFileInfo::Status status; + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + status = (itr->second).status; + if ( status == TStageFileInfo::RELEASED || status == TStageFileInfo::STAGED ) nentries++; + } + + if (_verbose) { cout << "TStageManager::getNstaged() = " << nentries << endl; } + return nentries; +} + + +int TStageManager::getSizeStaged() +{ + TStageFileInfo::Status status; + string filename; + Long64_t sumsize(0); + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + filename = (itr->first); + status = getStatusOf(filename.c_str(),false); + if (status == TStageFileInfo::RELEASED || status == TStageFileInfo::STAGED) { + sumsize += (itr->second).statFile.st_size ; + } + } + int mbsize = static_cast<int>( sumsize/(1024*1024) ); + return mbsize; +} + + +int TStageManager::getNtotal() +{ + return ( this->getNtobestaged() + static_cast<int>( _stageMap.size() ) ) ; +} + + +int TStageManager::getNproblem() +{ + return ( getNtotal() - getNtobestaged() - getNstaging() - getNstaged() ) ; +} + +void +TStageManager::stageNext(bool forceStage) +{ + if (_verbose) { cout << "TStageManager::stageNext()" << endl; } + + // update status + updateStatus(); + + // copy command given? + assert(!_stagerInfo.cpcommand.empty()); + + if (!_submittedStageMonitor) { + submitStageMonitor(); + _submittedStageMonitor=true; + } + + if (( (getNstaging()<_stagerInfo.pipeLength) || forceStage) && (!_toBeStagedList.empty()) ) { + + string cf = *(_toBeStagedList.begin()); + _toBeStagedList.erase(_toBeStagedList.begin()); + + _stageMap[cf] = TStageFileInfo(); + _stageMap[cf].status = TStageFileInfo::STAGING; + string inFile(cf); trim(inFile); + removePrefixOf(inFile); + _stageMap[cf].inFile = inFile; + _stageMap[cf].outFile = getTmpFilename(cf.c_str()); + + bool needToStageFile(true); + + // check if file already happens to exist from previous stage. If so, no need to stage. + if (_checkForPreviousStage) { + if (_verbose) cout << "TStageManager::stageNext() : checking for previous stage of <" << cf << ">." << endl; + if ( fileExists(_stageMap[cf].outFile.c_str()) ) { + if (_verbose) cout << "TStageManager::stageNext() : found previous stage of <" << cf << ">. Skip staging." << endl; + _stageMap[cf].status = TStageFileInfo::STAGED; + stat64(_stageMap[cf].outFile.c_str(),&(_stageMap[cf].statFile)); + needToStageFile = false ; + } + } + + // needToStageFile + if (needToStageFile) { + // useful output + if (forceStage) + if (_verbose) cout << "TStageManager::stageNext() : forcing stage of <" << cf << ">." << endl; + if (_verbose || _verbwait) { cout << "TStageManager::stageNext() : Now staging <" << cf << ">." << endl; } + if (_verbose) { cout << "TStageManager::stageNext() : outFile = <" << _stageMap[cf].outFile << ">." << endl; } + if (_verbose) cout << "about to fork" << endl ; + + // do fork + if( (_stageMap[cf].pid=fork()) == 0 ) { + // Code only executed by child process + + if (_verbose) cout << "this is child process " << getpid() << " " + << _stageMap[cf].pid + << endl ; + + if (_verbose) { cout << "child process : outFile = <" << _stageMap[cf].outFile << ">." << endl; } + if (_verbose) { cout << "child process : tmpFile = <" << getTmpFilename(cf.c_str()) << ">." << endl; } + + _stageMap[cf].stout = _stageMap[cf].outFile + ( _pidInLognames ? Form("_stage%d.out",getpid()) : "_stage.out" ); + _stageMap[cf].sterr = _stageMap[cf].outFile + ( _pidInLognames ? Form("_stage%d.err",getpid()) : "_stage.err" ); + + if (!_stagerInfo.logfileDir.empty()) { + _stageMap[cf].stout = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[cf].stout.c_str()); + _stageMap[cf].sterr = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[cf].sterr.c_str()); + } + + if (!_verbose) freopen(_stageMap[cf].stout.c_str(),"w",stdout); + if (!_verbose) freopen(_stageMap[cf].sterr.c_str(),"w",stderr); + + int nargs = 4 + int(_stagerInfo.cparg.size()); + const int argsc = 14; //(nargs); //= 4 + int(_stagerInfo.cparg.size()); + pchar args[argsc]; // = {"-r", "-t", "-l", (char *) 0 }; + + for (int i=0; i<nargs-1; ++i) { + args[i] = new char[1024]; + //for (int j=0; j<1024; ++j) { args[i][j] = (char)0; } + } + + strcpy(args[0],_stagerInfo.cpcommand.c_str()); + for (int i=1; i<=int(_stagerInfo.cparg.size()); ++i) + strcpy(args[i],(_stagerInfo.cparg[i-1]).c_str()); + + strcpy(args[nargs-3],_stageMap[cf].inFile.c_str()); + + string outTmpfile = _stagerInfo.outfilePrefix + _stageMap[cf].outFile; + strcpy(args[nargs-2],outTmpfile.c_str()); + strcpy(args[nargs-2],outTmpfile.c_str()); + + if (_verbose) cout << "child processs <" << outTmpfile.c_str() << "> <" << args[nargs-2] << ">" << endl; + + args[nargs-1] = (char *) 0; + + if (_verbose) { + cout << "child processs is executing execv " + << _stagerInfo.cpcommand.c_str() + << " with args " ; + for (int i=0; i<nargs-1; ++i) { if (args[i]) { cout << args[i] << " "; } } + cout << endl ; + } + + execvp(_stagerInfo.cpcommand.c_str(), args); + + //_stageMap[cf].status = TStageFileInfo::STAGING; + + for (int i=0; i<nargs-1; ++i) { if (args[i]) delete[] args[i]; } + } else { + // Code only executed by parent process + if (_verbose) { + cout << "this is parent process, pid of child = " + << _stageMap[cf].pid + << endl ; + } + _stageMap[cf].stout = _stageMap[cf].outFile + Form("_stage%d.out",_stageMap[cf].pid); + _stageMap[cf].sterr = _stageMap[cf].outFile + Form("_stage%d.err",_stageMap[cf].pid); + + if (!_stagerInfo.logfileDir.empty()) { + _stageMap[cf].stout = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[cf].stout.c_str()); + _stageMap[cf].sterr = _stagerInfo.logfileDir + "/" + basename((char*)_stageMap[cf].sterr.c_str()); + } + } // end of fork + } // need to stage file + } // end of stager loop + + // reupdate status + //updateStatus(); +} + + +void +TStageManager::updateStatus() +{ + if (_verbose) { cout << "TStageManager::updateStatus()" << endl; } + + map<string,TStageFileInfo>::iterator itr = _stageMap.begin(); + for (; itr!=_stageMap.end(); ++itr) { + if ((itr->second).status == TStageFileInfo::STAGING) { + + pid_t pID = (itr->second).pid; + + int childExitStatus; + waitpid( pID, &childExitStatus, WNOHANG); + + if( WIFEXITED(childExitStatus) ) { + // done staging + //(itr->second).status = TStageFileInfo::STAGED; + if (_verbose) { cout << "waitpid() still staging. Returned with: Status= " + << WEXITSTATUS(childExitStatus) + << " and Exit= " + << WIFEXITED(childExitStatus) + << endl; } + } else if( WIFSIGNALED(childExitStatus) ) { + if (_verbose) { cout << "waitpid() still staging. Returned with: Signal= " + << WTERMSIG(childExitStatus) + << " and Exit= " + << WIFSIGNALED(childExitStatus) + << endl; } + //(itr->second).status = TStageFileInfo::ERRORSTAGING; + } + // else file still staging + } + } +} + + +void +TStageManager::submitStageMonitor() +{ + if (_verbose) { cout << "TStageManager::submitStageMonitor()" << endl; } + if (_stagerInfo.stagemonitorcmd.empty()) return; + + std::string whichstr = Form("which %s 2>/dev/null 1>/dev/null",_stagerInfo.stagemonitorcmd.c_str()); + int whichint = system( whichstr.c_str() ); // check for stagemonitor + if (whichint!=0) { + cout << "TStageManager::submitStageMonitor() : ERROR : <" << _stagerInfo.stagemonitorcmd + << "> not found in $PATH. Exit." + << endl; + exit(1); + } + + // pass parent pid to stagemonitor + int ppid = getpid(); + + int cpid(0); + if( (cpid=fork()) == 0 ) { + // Code only executed by child process + + if (_verbose) cout << "this is child process " << getpid() << endl ; + + pid_t pgid = setsid(); //setpgid(getpid(), getpid()); + if( pgid < 0) { + cout << "Failed to set process group ID" << endl; + _exit(0); // If exec fails then exit forked process. + } + + const int nargs = 7; + pchar args[nargs]; + for (int i=0; i<nargs-1; ++i) { args[i] = new char[1024]; } + + strcpy(args[0],_stagerInfo.stagemonitorcmd.c_str()); + sprintf(args[1],"%d",ppid); + strcpy(args[2],_stagerInfo.tmpdir.c_str()); + strcpy(args[3],_stagerInfo.baseTmpdir.c_str()); + if (_keepLogfiles) strcpy(args[4],"1"); + else strcpy(args[4],"0"); + if (_keepStagedfiles) strcpy(args[5],"1"); + else strcpy(args[5],"0"); + args[6]=(char *) 0; + + if (_verbose) { + cout << "child processs is executing execv " + << _stagerInfo.stagemonitorcmd + << " with args " ; + for (int i=0; i<nargs-1; ++i) { if (args[i]) { cout << args[i] << " "; } } + cout << endl ; + } + + execvp(_stagerInfo.stagemonitorcmd.c_str(), args); + for (int i=0; i<nargs-1; ++i) { if (args[i]) delete[] args[i]; } + + } else { + // Code only executed by parent process + if (_verbose) + cout << "this is parent process, pid of child = " << cpid << endl ; + } + +} + + +void +TStageManager::trim(string& input) +{ + // trim leading and trailing whitespace + string::size_type position = input.find_first_not_of(" \t\n"); + if ( position == std::string::npos ) return; // skip, it's all whitespace + input.erase(0, position); + position= input.find_last_not_of(" \t\n"); + if ( position != std::string::npos) + input.erase( position+1 ); +} + + +void +TStageManager::removePrefixOf(string& filename) +{ + string::size_type ipsize(_stagerInfo.infilePrefix.size()); + string::size_type opsize(_stagerInfo.outfilePrefix.size()); + + if (strncmp(filename.c_str(),_stagerInfo.infilePrefix.c_str(),ipsize)==0) + filename = filename.substr(ipsize,filename.size()-ipsize); + else if (strncmp(filename.c_str(),_stagerInfo.outfilePrefix.c_str(),opsize)==0) + filename = filename.substr(opsize,filename.size()-opsize); +} + + +void +TStageManager::removeFile(string filename) +{ + if (filename.empty()) return; + + if (_verbose) cout << "TStageManager::removeFile() : " << filename << endl; + + if (remove(filename.c_str())==-1) { + if (_verbose) + cerr << "TStageManager::removeFile() Could not remove file: <" + << filename + << ">." + << endl; + } +} + + +void TStageManager::setBaseTmpdir(const char* baseTmpdir) { + string basedir = baseTmpdir; + + string::size_type position; + while( (position=basedir.find_last_of("/")) == basedir.size() ) + basedir.replace(position,1,""); // remove any last slashes + + _stagerInfo.baseTmpdir = basedir; + _stagerInfo.setTmpdir(); +} + + +void TStageManager::setCpCommand(const char* cpcommand) { + _stagerInfo.cpcommand = cpcommand; + _stagerInfo.cparg.clear(); +} + + +void TStageManager::addCpArg(const char* cpargm) { + _stagerInfo.cparg.push_back(cpargm); +} + +void TStageManager::setPipeLength(const int& pipeLength) { + _stagerInfo.pipeLength = pipeLength; +} + +void TStageManager::setInfilePrefix(const char* infilePrefix) { + _stagerInfo.infilePrefix = infilePrefix; +} + +void TStageManager::setOutfilePrefix(const char* outfilePrefix) { + _stagerInfo.outfilePrefix = outfilePrefix; +} + +void TStageManager::setLogfileDir(const char* logfileDir) { + _stagerInfo.logfileDir = logfileDir; +} + +void TStageManager::addPrefixFix(const char* in, const char* out) { + if (_stagerInfo.inPrefixMap.find(in)==_stagerInfo.inPrefixMap.end()) + _stagerInfo.inPrefixMap[in] = out; +} + +void TStageManager::fixRootInPrefix(string& tmpname) +{ + map<string,string>::iterator itr = _stagerInfo.inPrefixMap.begin(); + for (; itr!=_stagerInfo.inPrefixMap.end(); ++itr) { + if(tmpname.find(itr->first)!=tmpname.npos) { + tmpname.replace(tmpname.find(itr->first),(itr->first).size(),itr->second); + break; + } + } +} + +bool TStageManager::fileExists(const char* fileName) +{ + struct stat64 info; + int ret = -1; + + //get the file attributes + ret = stat64(fileName, &info); + if(ret == 0) { + //stat() is able to get the file attributes, + //so the file obviously exists + // if filesize==0 assume the copying failed. + if (info.st_size == 0) return false; + else return true; + } else { + //stat() is not able to get the file attributes, + //so the file obviously does not exist or + //more capabilities is required + return false; + } +} + diff --git a/Database/FileStager/Root/TStagerInfo.cxx b/Database/FileStager/Root/TStagerInfo.cxx new file mode 100644 index 0000000000000000000000000000000000000000..4e1cf7283836123093f8f4c251ae7932ee0bd534 --- /dev/null +++ b/Database/FileStager/Root/TStagerInfo.cxx @@ -0,0 +1,87 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include "FileStager/TStagerInfo.h" +#include <stdlib.h> + +ClassImp(TStagerInfo) + + +TStagerInfo::TStagerInfo() + : pipeLength(1) + , pid(getpid()) + , baseTmpdir("/tmp") + , tmpdir("/tmp") + , cpcommand("cp") + , stagemonitorcmd("StageMonitor.exe") +{ + setDefaultTmpdir(); + + inPrefixMap["lfn/"] = "lfn:/"; + inPrefixMap["srm//"] = "srm://"; + inPrefixMap["dcap//"] = "dcap://"; + inPrefixMap["sfn/"] = "sfn:/"; + inPrefixMap["gsiftp/"] = "gsiftp:/"; + inPrefixMap["rfio/"] = "rfio:/"; + inPrefixMap["http/"] = "http:/"; + inPrefixMap["file/"] = "file:/"; + inPrefixMap["ftp/"] = "ftp:/"; + //inPrefixMap["root//"] = "root://"; + inPrefixMap["root//castoratlas"] = "root://castoratlas/"; + +} + + +TStagerInfo::~TStagerInfo() {} + + +void TStagerInfo::setTmpdir() +{ + // MB: make baseTmpdir the default so actual tmpdir is easily known from outside ... + tmpdir = baseTmpdir; + + char pidchar[25]; + sprintf(pidchar,"%d",pid); + string testtmpdir = baseTmpdir+"/"+getenv("USER")+"_pID"+pidchar; + int errnum = mkdir(testtmpdir.c_str(),0700); + + // test the tmpdir ... + if ( !(errnum==0 || errnum==2) ) { + // no permissions to write in base directory, switching to /tmp + cerr << "TStagerInfo::setTmpdir() : No permission to write in temporary directory <" + << baseTmpdir + << ">. Switching back to default <$TMPDIR>, or else </tmp>." + << endl; + setDefaultTmpdir(); + } else { + // writing works, leave nothing behind + rmdir(testtmpdir.c_str()); + } +} + + +void TStagerInfo::setDefaultTmpdir() +{ + const char* tmp1 = getenv("TMPDIR"); + const char* tmp2 = getenv("WORKDIR"); + const char* tmp3 = getenv("OSG_WN_TMP"); + const char* tmp4 = getenv("EDG_WL_SCRATCH"); + + if (tmp4!=0) { + baseTmpdir = tmp4; + tmpdir = tmp4; + } else if (tmp3!=0) { + baseTmpdir = tmp3; + tmpdir = tmp3; + } else if (tmp2!=0) { + baseTmpdir = tmp2; + tmpdir = tmp2; + } else if (tmp1!=0) { + baseTmpdir = tmp1; + tmpdir = tmp1; + } else { + baseTmpdir = "/tmp"; + tmpdir = "/tmp"; + } +} diff --git a/Database/FileStager/bin/StageMonitor.cxx b/Database/FileStager/bin/StageMonitor.cxx new file mode 100644 index 0000000000000000000000000000000000000000..fe38899686ef92086bbd194744348f9bc1b3af64 --- /dev/null +++ b/Database/FileStager/bin/StageMonitor.cxx @@ -0,0 +1,118 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +#include <csignal> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <iostream> +#include <signal.h> +#include <dirent.h> +//#include <sys/signal.h> + +#include <string> +#include <vector> + +using namespace std ; + +// global dirs +string tmpdir; +string baseTmpdir; +bool keepLogfiles; +bool keepStagedfiles; +bool verbose(false); + +void term(int sig) +{ + //..necessary cleanup operations before terminating + if (verbose) std::cout << "StageMonitor : now handling signal : " << sig << std::endl; + + // get dir contents ... + vector<string> files; + DIR *dp; + struct dirent *dirp; + if((dp = opendir(tmpdir.c_str())) == NULL) { + //cout << "Error(" << errno << ") opening " << tmpdir << endl; + } else { + while ((dirp = readdir(dp)) != NULL) { + string ifile = (dirp->d_name) ; + if (ifile.find("tcf_")!=ifile.npos) files.push_back(tmpdir+"/"+ifile); + } + closedir(dp); + } + + bool warningPrinted(false); + + // delete stuff + for (unsigned int i=0; i<files.size(); i++) { + //cout << files[i] << endl; + if (keepLogfiles || keepStagedfiles) { + if ((files[i].rfind(".out")==files[i].size()-4) || + (files[i].rfind(".err")==files[i].size()-4)) { // log files + if (keepLogfiles) { + if (!warningPrinted) { + std::cout << "StageMonitor() : WARNING : Make sure to manually remove : " << std::endl; + warningPrinted = true; + } + std::cout << "rm -f " << files[i] << std::endl; + continue; + } + } else if (keepStagedfiles) { + if (!warningPrinted) { + std::cout << "StageMonitor() : WARNING : Make sure to manually remove : " << std::endl; + warningPrinted = true; + } + std::cout << "rm -f " << files[i] << std::endl; + continue; // remainder are staged files + } + } else remove(files[i].c_str()); + } + if (tmpdir.compare(baseTmpdir)!=0) + rmdir(tmpdir.c_str()); +} + + +//----------------------------------- +int main(int argc, const char** argv) +//----------------------------------- +{ + if ( argc<4 ) { + std::cout << "usage: " << argv[0] << " <pid> <tmpdir> <tmpdirbase> [<keepLogFiles>] [<keepStagedFiles>]" << std::endl ; + return 1 ; + } + + signal(SIGTERM, term); // register a SIGTERM handler + + bool doCleanup(false); + pid_t pID = atoi(argv[1]); + tmpdir = argv[2]; + baseTmpdir = argv[3]; + if (argc>=5) keepLogfiles = (bool)atoi(argv[4]); + else keepLogfiles = false; + if (argc>=6) keepStagedfiles = (bool)atoi(argv[5]); + else keepStagedfiles = false; + + //cout << argv[0] << " : Monitoring process with id = " << pID << endl; + int killReturn; + + while (1) { + killReturn = kill(pID,0); + //cout << argv[0] << " : " << killReturn << endl; + if( killReturn == -1 ) { + // pid does not exist + doCleanup=true; + break; + } + sleep(10); + } + + if (doCleanup) raise(SIGTERM); // will cause term() to run + + exit(0); +} + diff --git a/Database/FileStager/build/NEWS b/Database/FileStager/build/NEWS new file mode 100644 index 0000000000000000000000000000000000000000..a1a9d269f1e5c7384553d0b52557eabe3de2e4bf --- /dev/null +++ b/Database/FileStager/build/NEWS @@ -0,0 +1 @@ +11.11.2008 - Completed AMA standalone build chain diff --git a/Database/FileStager/build/README b/Database/FileStager/build/README new file mode 100644 index 0000000000000000000000000000000000000000..b7d52ada669cbf07647ed90f802682a14204c999 --- /dev/null +++ b/Database/FileStager/build/README @@ -0,0 +1,31 @@ +Scripts +======= + +* bootstrap.sh Set up build directory (run autoreconf etc.) +* cleanup.sh Clean up build directory (delete files created during + build etc.) +* generate_filelists.sh Prints listing of source files for usage in Makefile.am +* make_package.sh Make tarball +* standalone_build.sh Start complete standalone build (see below) + +Files +===== + +* configure.ac autoconf input file +* Makefile.am automake input file +* package-config.in Template for package-config script + +Standalone build +================ + +Run ./standalone_build.sh [--prefix INSTALLDIR] + +Files will be installed to INSTALLDIR. The default is /usr. + +Make sure that the directories where config scripts of required +packages can be found have been added to the PATH variable. + +After the successful installation, you will have to set + export PATH=INSTALLDIR/bin:$PATH + export LD_LIBARARY_PATH=INSTALLDIR/lib:$LD_LIBRARY_PATH +to be able to use the package (e.g. add it to ~/.bashrc). diff --git a/Database/FileStager/build/bootstrap.sh b/Database/FileStager/build/bootstrap.sh new file mode 100755 index 0000000000000000000000000000000000000000..ca99ce3686564a6c68562e3da85a4a8fba993321 --- /dev/null +++ b/Database/FileStager/build/bootstrap.sh @@ -0,0 +1,155 @@ +#!/bin/sh + +############################################################ +# Set list of source directories to which symlinks should +# be created to +############################################################ +SRCDIRS="FileStager Root bin" + +echo "Bootstrapping..." + +############################################################ +# Check requirements +############################################################ +if [ -z `which root-config` ]; then + echo "Could not find root-config in PATH" + exit 1 +fi + +############################################################ +# Setup sed +############################################################ +if [ x`uname -s` = xDarwin ]; then + SED_VERSION=`sed --version 2>&1 | head -n1` + if echo $SED_VERSION | grep -q illegal; then + echo -n "Default sed is too old, looking for gsed..." + if [ x`which gsed` = x ]; then + echo "not found" + echo "Please install MacPorts and run 'sudo port install gsed'" + exit 1 + else + echo "found" + SED_PROGRAM=gsed + fi + else + SED_PROGRAM=sed + fi +else + SED_PROGRAM=sed +fi + +if [ x`which $SED_PROGRAM` = x ]; then + echo "sed is not installed, bailing out" + exit 1 +fi + +export SED_PROGRAM + +############################################################ +# Create symlinks to source directories +############################################################ +for dir in $SRCDIRS; do + ln -sf ../$dir +done + +############################################################ +# Create files required by automake +############################################################ +cp ../ChangeLog . + +touch AUTHORS + +############################################################ +# Start build +############################################################ +mkdir -p m4 + +# Generate Makefile.am +cat > Makefile.am <<EOF +ACLOCAL_AMFLAGS = -I m4 + +# set some variables by hand for old autoconf versions +builddir = . +abs_srcdir = \`cd \$(srcdir) && pwd\` +SED = $SED_PROGRAM + +lib_LTLIBRARIES = libFileStager.la + +libFileStager_la_CXXFLAGS = \$(ROOT_CFLAGS) -D__FSSTANDALONE__ -I\$(builddir)/Root + +libFileStager_la_LDFLAGS = \$(ROOT_LDFLAGS) \$(ROOT_LIBS) + +EXTRA_DIST = Root/LinkDef.h + +bin_PROGRAMS = StageMonitor + +StageMonitor_CXXFLAGS = \$(ROOT_CFLAGS) -D__FSSTANDALONE__ + +StageMonitor_LDFLAGS = \$(ROOT_LDFLAGS) \$(ROOT_LIBS) + +StageMonitor_LDADD = \$(builddir)/libFileStager.la + +nodist_libFileStager_la_SOURCES = \$(builddir)/Root/FileStagerCint.cxx + +EOF + +./generate_filelists.sh >> Makefile.am + +cat >> Makefile.am <<EOF +\$(builddir)/Root/FileStagerCint.cxx: + mkdir -p \$(builddir)/Root + rootcint -f \$(builddir)/Root/FileStagerCint.cxx -c -p -I\$(abs_srcdir) \$(ROOT_CFLAGS) \$(abs_srcdir)/FileStager/T*.h \$(abs_srcdir)/Root/LinkDef.h + +clean-local: + -rm -f \$(builddir)/Root/FileStagerCint.h + -rm -f \$(builddir)/Root/FileStagerCint.cxx + +# Create config script +# + +PACKAGEPREFIX = \$(prefix) +PACKAGENAME = \$(PACKAGE) +PACKAGEVERSION = \$(VERSION) +PACKAGELTLIBS = \$(libdir)/libFileStager.la +PACKAGECFLAGS = -I\$(includedir) +PACKAGEEXTCFLAGS = \$(CPPFLAGS) +PACKAGELDFLAGS = -L\$(libdir) -lFileStager +PACKAGELDADD = "" +PACKAGELIBS = -lFileStager +PACKAGESTATICLIBS = \$(libdir)/libFileStager.a + +# From here on, no changes need to be applied +# + +gen_config = \$(SED) \\ + -e 's,_PACKAGEPREFIX_,\$(PACKAGEPREFIX),g' \\ + -e 's,_PACKAGENAME_,\$(PACKAGENAME),g' \\ + -e 's,_PACKAGEVERSION_,\$(PACKAGEVERSION),g' \\ + -e 's,_PACKAGELTLIBS_,\$(PACKAGELTLIBS),g' \\ + -e 's,_PACKAGECFLAGS_,\$(PACKAGECFLAGS),g' \\ + -e 's,_PACKAGEEXTCFLAGS_,\$(PACKAGEEXTCFLAGS),g' \\ + -e 's,_PACKAGELDFLAGS_,\$(PACKAGELDFLAGS),g' \\ + -e 's,_PACKAGELDADD_,\$(PACKAGELDADD),g' \\ + -e 's,_PACKAGELIBS_,\$(PACKAGELIBS),g' \\ + -e 's,_PACKAGESTATICLIBS_,\$(PACKAGESTATICLIBS),g' + +\$(PACKAGENAME)-config: package-config.in Makefile + \$(gen_config) \$< > \$@ + chmod +x \$@ + +all: \$(PACKAGENAME)-config + +install-exec-local: all + \$(mkinstalldirs) \$(DESTDIR)\$(bindir) + \$(INSTALL) \$(PACKAGENAME)-config \$(DESTDIR)\$(bindir) + +uninstall-local: + rm -f \$(DESTDIR)\$(bindir)/\$(PACKAGENAME)-config + -rmdir \$(DESTDIR)\$(bindir) + +MOSTLYCLEANFILES = \$(PACKAGENAME)-config + +EXTRA_DIST += package-config.in +EOF + +autoreconf -fiv diff --git a/Database/FileStager/build/cleanup.sh b/Database/FileStager/build/cleanup.sh new file mode 100755 index 0000000000000000000000000000000000000000..eb687f7e4e98d7858dfe4bfd43a9317fdcf1d190 --- /dev/null +++ b/Database/FileStager/build/cleanup.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +rm -f FileStager Root bin +rm -rf aclocal.m4 autom4te.cache config.* configure depcomp install-sh ltmain.sh Makefile* missing +rm -f AUTHORS ChangeLog COPYING INSTALL *~ +rm -rf build m4 diff --git a/Database/FileStager/build/configure.ac b/Database/FileStager/build/configure.ac new file mode 100644 index 0000000000000000000000000000000000000000..e9b5e278eb2e7b2363fa67471db47ae5c10f6544 --- /dev/null +++ b/Database/FileStager/build/configure.ac @@ -0,0 +1,46 @@ +AC_INIT(Makefile.am) + +AC_CONFIG_MACRO_DIR([m4]) + +AC_CANONICAL_HOST +AC_CANONICAL_TARGET + +AM_INIT_AUTOMAKE(FileStager,0.1.0) + +AC_PROG_CC +AC_PROG_CXX +AC_LANG(C++) +AC_PROG_LIBTOOL +AM_PROG_LIBTOOL +AC_PROG_INSTALL + +# Check for ROOT +# +AC_CHECK_PROG(ROOT_BIN,[root-config],[yes]) +if test -z "$ROOT_BIN" ; then + AC_MSG_ERROR([root-config not found in PATH]) +fi +if root-config --dummy-nonexistent-param >/dev/null 2>&1; then + AC_MSG_ERROR([root-config script does not return error on bad parameters. Please update root.]) +fi +if ! root-config --cflags >/dev/null 2>&1; then + AC_MSG_ERROR([root-config script does not parse --cflags correctly]) +fi +if ! root-config --ldflags >/dev/null 2>&1; then + AC_MSG_ERROR([root-config script does not parse --ldflags correctly]) +fi +if ! root-config --libs >/dev/null 2>&1; then + AC_MSG_ERROR([root-config script does not parse --libs correctly]) +fi + +ROOT_CFLAGS="`root-config --cflags`" +ROOT_LDFLAGS="`root-config --ldflags`" +ROOT_LIBS="`root-config --libs`" + +AC_SUBST(ROOT_CFLAGS) +AC_SUBST(ROOT_LDFLAGS) +AC_SUBST(ROOT_LIBS) + +AC_CONFIG_FILES([Makefile]) + +AC_OUTPUT diff --git a/Database/FileStager/build/generate_filelists.sh b/Database/FileStager/build/generate_filelists.sh new file mode 100755 index 0000000000000000000000000000000000000000..bee54dfe4228bef62f9d1a1d08afc44b43428d98 --- /dev/null +++ b/Database/FileStager/build/generate_filelists.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +if [ -z $SED_PROGRAM ]; then + echo "SED_PROGRAM is not set" + exit 1 +fi + +beautify() +{ + while read i; do + echo -n $i | $SED_PROGRAM -e 's,^, \\\n\t,' + done + echo +} + +echo -n "libFileStager_la_SOURCES =" +ls -1 Root/T*.cxx | beautify +echo +echo -n "nobase_include_HEADERS =" +ls -1 FileStager/T*.h | beautify +echo +echo -n "StageMonitor_SOURCES =" +ls -1 bin/*.cxx | beautify +echo diff --git a/Database/FileStager/build/package-config.in b/Database/FileStager/build/package-config.in new file mode 100644 index 0000000000000000000000000000000000000000..8a5d0b31e30fada6e1b9b6bff900ce02f4e6c5fc --- /dev/null +++ b/Database/FileStager/build/package-config.in @@ -0,0 +1,151 @@ +#!/bin/sh + +# +# Analogue to xml2-config +# +# Replace following variables (of course without the whitespace) with e.g. sed: +# _PACKAGEPREFIX _ +# _PACKAGENAME _ +# _PACKAGEVERSION _ +# _PACKAGELTLIBS _ +# _PACKAGECFLAGS _ +# _PACKAGELDFLAGS _ +# _PACKAGELDADD _ +# _PACKAGELIBS _ +# _PACKAGESTATICLIBS _ + +usage() +{ + cat <<EOF +Usage: _PACKAGENAME_-config [OPTION] + +Known values for OPTION are: + + --prefix=DIR change _PACKAGENAME_ prefix [default $prefix] + --exec-prefix=DIR change _PACKAGENAME_ exec prefix [default $exec_prefix] + --ldflags print library flags + --libs print library linking information + --libtool-libs print linking information for use with libtool + --static-libs print static library linking information + --cflags print pre-processor and compiler flags + --help display this help and exit + --version output version information +EOF + + exit $1 +} + +prettyuniqdirs() +{ + list=`prettydirs $@` + cache="" + for i in $list; do + if ! echo "$cache"|grep " $i " >/dev/null 2>&1; then + cache="$cache $i "; + fi + done + echo $cache +} +prettydirs() +{ + echo "$@" | sed 's/\/\+/\//g' | sed 's/\/$//' | sed 's/\/\ /\ /g' +} + +if test $# -eq 0; then + usage 1 +fi + +PACKAGEPREFIX="_PACKAGEPREFIX_" +PACKAGENAME="_PACKAGENAME_" +PACKAGEVERSION="_PACKAGEVERSION_" +PACKAGELTLIBS="_PACKAGELTLIBS_" +PACKAGECFLAGS="_PACKAGECFLAGS_" +PACKAGELDFLAGS="_PACKAGELDFLAGS_" +PACKAGELDADD="_PACKAGELDADD_" +PACKAGELIBS="_PACKAGELIBS_" +PACKAGESTATICLIBS="_PACKAGESTATICLIBS_" + +PACKAGEPREFIX_ESC=`prettydirs $PACKAGEPREFIX | sed 's/\\//\\\\\//g'` + +prefix=${PACKAGEPREFIX} +prefix_esc=`prettydirs $prefix | sed 's/\\//\\\\\//g'` +exec_prefix=${prefix} +exec_prefix_esc=`prettydirs $exec_prefix | sed 's/\\//\\\\\//g'` + +includedir=${prefix}/include +libdir=${prefix}/lib + +cflags=false +libs=false + +while test $# -gt 0; do + case "$1" in + -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + *) optarg= ;; + esac + + case "$1" in + --prefix=*) + prefix=`prettydirs $optarg` + prefix_esc=`prettydirs $prefix | sed 's/\\//\\\\\//g'` + includedir=$prefix/include + libdir=$prefix/lib + ;; + + --prefix) + prettydirs ${prefix} + ;; + + --exec-prefix=*) + exec_prefix=`prettydirs $optarg` + exec_prefix_esc=`prettydirs $exec_prefix | sed 's/\\//\\\\\//g'` + libdir=$exec_prefix/lib + ;; + + --exec-prefix) + prettydirs ${exec_prefix} + ;; + + --version) + echo ${PACKAGEVERSION} + exit 0 + ;; + + --help) + usage 0 + ;; + + --cflags) + prettyuniqdirs `echo ${PACKAGECFLAGS} | \ + sed "s/-I${PACKAGEPREFIX_ESC}/-I${prefix_esc}/g"` + ;; + + --libtool-libs) + prettyuniqdirs ${PACKAGELTLIBS} | \ + sed "s/${PACKAGEPREFIX_ESC}/${prefix_esc}/g" + ;; + + --static-libs) + prettyuniqdirs ${PACKAGESTATICLIBS} | \ + sed "s/${PACKAGEPREFIX_ESC}/${prefix_esc}/g" + ;; + + --ldflags) + prettyuniqdirs `echo ${PACKAGELDFLAGS} | \ + sed "s/-L${PACKAGEPREFIX_ESC}/-L${prefix_esc}/g" | \ + sed "s/-L${PACKAGEPREFIX_ESC}/-L${exec_prefix_esc}/g"` ${PACKAGELDADD} + ;; + + --libs) + echo ${PACKAGELIBS} + ;; + + *) + usage 1 + exit 1 + ;; + esac + shift +done + +exit 0 diff --git a/Database/FileStager/build/standalone_build.sh b/Database/FileStager/build/standalone_build.sh new file mode 100755 index 0000000000000000000000000000000000000000..377bfa1a28fcb12f40253268f8f38d0c8af1bd09 --- /dev/null +++ b/Database/FileStager/build/standalone_build.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +if [ $# -eq 0 ]; then + echo "No additional options given, using defaults" +else + echo "Running ./configure $*" +fi + +sleep 1 + +if [ x$ROOTSYS = x ]; then + echo "ROOTSYS not set, exiting" + exit 1 +fi + +if [ ! -x $ROOTSYS/bin/root-config ]; then + echo "root-config not found, exiting" + exit 1 +fi + +# set CFLAGS in order to get proper machine type for libtool +export CFLAGS=`root-config --cflags` + +./bootstrap.sh || exit 1 + +mkdir build + +echo "Running ./configure $@" + +if [ x`uname -s` = xLinux ]; then + NCPUS=`grep -c processor /proc/cpuinfo` +elif [ x`uname -s` = xDarwin ]; then + NCPUS=`sysctl -n hw.ncpu` +fi + +if [ ! -z $NCPUS ]; then + echo "Detected ${NCPUS} cpus, using make -j${NCPUS}" + MAKEFLAGS="-j${NCPUS}" + sleep 1 +fi + +(cd build && ../configure $@ && make $MAKEFLAGS && make install) diff --git a/Database/FileStager/cmt/Makefile.Standalone b/Database/FileStager/cmt/Makefile.Standalone new file mode 100644 index 0000000000000000000000000000000000000000..ced78cab3fa8abf8e949fb4cc35c22115dd97736 --- /dev/null +++ b/Database/FileStager/cmt/Makefile.Standalone @@ -0,0 +1,125 @@ +# Author: Max Baak (mbaak@cern.ch) +# FileStager stand-alone makefile + +# ------------------------------------------------------------- +# General flags +# ------------------------------------------------------------- +PACKAGE = FileStager + +OUTPUTDIR = ../StandAlone +#ifneq ($(CMTCONFIG),) +#OUTPUTDIR = ../$(CMTCONFIG) +#endif + +CC = g++ +CCFLAGS = -g -m32 -Wall -W -Woverloaded-virtual -Wno-parentheses -Wno-unused-parameter -Wno-unused-variable +LDFLAGS = -g -m32 +#CCFLAGS = -g -m64 -fPIC -Wall -W -Woverloaded-virtual -Wno-parentheses -Wno-unused-parameter -Wno-unused-variable +#LDFLAGS = -g -m64 +MFLAGS = -MM +WORKDIR = ../tmp +INCLUDES += -I. -I../ -I../FileStager/ -I../Root/ -I$(WORKDIR) -I$(ROOTSYS)/include +LINKLIBS = -L${ROOTSYS}/lib -lRIO -lNet -lCore -lCint -lTree -lHist -lMatrix -lPhysics -ldl -lm +#-lRIO -lNet needed for root >v5.16 +CCFLAGS += -D__FSSTANDALONE__ + +# for cross-compilation. ATLAS env sets CPPEXPFLAGS if it is needed +# If you want to cross-compile standalone just set CPPEXPFLAGS, for host-slc6&target-slc5 with gcc43 it is -D__USE_XOPEN2K8 +ifneq ($(strip $(CPPEXPFLAGS)),) +CCFLAGS += $(CPPEXPFLAGS) +endif + + +# ------------------------------------------------------------- +# ROOT Cint +# ------------------------------------------------------------- +CCLISTT = TStagerInfo.cxx TStageFileInfo.cxx TStageManager.cxx TCopyChain.cxx TCopyFile.cxx +HHLISTT = TStagerInfo.h TStageFileInfo.h TStageManager.h TCopyChain.h TCopyFile.h +CCLIST = $(addprefix ../Root/,$(CCLISTT)) +HHLIST = $(addprefix ../FileStager/,$(HHLISTT)) +HHLISTC = $(HHLIST) + +LDEFFILE = ../Root/LinkDef.h +CINTFILE = ../Root/FileStagerCint.cxx +CINT = stagercint +CINTOBJ = ../Root/FileStagerCint.o +HHLISTC += ../Root/FileStagerCint.h + +OLIST = $(patsubst %.cxx,%.o,$(CCLIST)) +OLISTC = $(OLIST) +OLISTC += $(CINTOBJ) +DLIST = $(patsubst %.h,%.d,$(HHLISTC)) + +# ------------------------------------------------------------- +# StageMonitor executable +# ------------------------------------------------------------- +SMCC = ../bin/StageMonitor.cxx +SMO = ../bin/StageMonitor.o +SMFILE = StageMonitor +SMFILEEXE = $(OUTPUTDIR)/$(SMFILE).exe + +# ------------------------------------------------------------- +# Libraries +# ------------------------------------------------------------- +LIBFILE = $(OUTPUTDIR)/lib$(PACKAGE)Lib.a +SHLIBFILE = $(OUTPUTDIR)/lib$(PACKAGE)Lib.so + +# ------------------------------------------------------------- +# Compilation +# ------------------------------------------------------------- + +default: $(SMFILE) $(CINT) $(SHLIBFILE) clean + +# Implicit rule making all dependency Makefiles included at the end of this makefile +%.d: %.cxx $(HHLIST) + @set -e; $(CC) $(MFLAGS) $(CCFLAGS) $(INCLUDES) $< \ + | awk '{ sub("^$(notdir $*).o:","$*.o $@:") ; print }' > $@ ;\ + [ -s $@ ] || rm -f $@ + +# Implicit rule to compile all classes +%.o: %.cxx + @echo "Compiling $<" + $(CC) $(CCFLAGS) -c $< -o $*.o $(INCLUDES) + +# Rule to make StageMonitor +$(SMFILE) : $(SMO) + @echo "Making $(SMFILEEXE)" + @mkdir -p $(OUTPUTDIR) + $(CC) $(CCFLAGS) -o $(SMFILEEXE) $(SMO) $(LINKLIBS) + +# Rule to make ROOTCINT output file +$(CINT) : $(HHLIST) + @echo "Running rootcint" + $(ROOTSYS)/bin/rootcint -f $(CINTFILE) -c -p $(INCLUDES) $(HHLIST) $(LDEFFILE) + $(CC) $(CCFLAGS) -c $(CINTFILE) -o $(CINTOBJ) $(INCLUDES) + +## Rules to combine objects into a executable +#$(BINFILE): $(OLIST) ../bin/$(BINFILE).o +# $(CC) $(LDFLAGS) -o $(BINFILEEXE) $(OLIST) ../bin/$(BINFILE).o $(LINKLIBS) + +# Rule to combine objects into a library +$(LIBFILE): $(OLISTC) + @echo "Making $(LIBFILE)" + @mkdir -p $(OUTPUTDIR) + @rm -f $(LIBFILE) + @ar q $(LIBFILE) $(OLISTC) + @ranlib $(LIBFILE) + +# Rule to combine objects into a shared library +$(SHLIBFILE): $(OLISTC) + @echo "Making $(SHLIBFILE)" + @mkdir -p $(OUTPUTDIR) + @rm -f $(SHLIBFILE) + @$(CC) $(CCFLAGS) $(OLISTC) -shared -o $(SHLIBFILE) $(LINKLIBS) + +-include $(DLIST) + +# Useful build targets +lib: $(LIBFILE) +shlib: $(SHLIBFILE) +cint: $(CINT) +clean: + @rm -f ../Root/*.o ../Root/*.d ../bin/*.o ../bin/*.d + +.PHONY : shlib lib default clean + diff --git a/Database/FileStager/cmt/requirements b/Database/FileStager/cmt/requirements new file mode 100644 index 0000000000000000000000000000000000000000..3f5dfb900c433796699c7208ed10040a80ab06ee --- /dev/null +++ b/Database/FileStager/cmt/requirements @@ -0,0 +1,47 @@ +package FileStager + +author Max Baak <mbaak@cern.ch> + +use AtlasPolicy AtlasPolicy-01-* +use GaudiInterface GaudiInterface-01-* External +use AthenaKernel AthenaKernel-* Control +use AtlasROOT AtlasROOT-* External +use AthenaPoolKernel AthenaPoolKernel-* Database/AthenaPOOL + +## macros +apply_tag ROOTBasicLibs +apply_tag ROOTMathLibs +apply_tag ROOTSTLDictLibs + +## build libraries +apply_pattern dual_use_library files="../Root/*.cxx ../src/*.cxx ../src/components/*.cxx" +macro FileStagerLib_dependencies "filestager_root_dictionary" +action filestager_root_dictionary "( make -f Makefile.Standalone stagercint clean )" + +### Create a named installed library +#library FileStagerLib "../Root/*.cxx" +#apply_pattern named_installed_library library=FileStagerLib +#macro FileStagerLib_dependencies "filestager_root_dictionary" +#action filestager_root_dictionary "( make -f Makefile.Standalone stagercint )" +# +### Create a component library +#library FileStager *.cxx components/*.cxx +##library FileStager "../src/*.cxx ../components/*.cxx" +#macro_append FileStager_dependencies " FileStagerLib" +#apply_pattern component_library + +## applications +macro_append StageMonitor_dependencies " FileStagerLib " +application StageMonitor "../bin/StageMonitor.cxx" + +apply_pattern declare_scripts files="../scripts/define_* ../scripts/interpret_* ../scripts/wrapper_*" +apply_pattern declare_joboptions files="*.py" +apply_pattern declare_python_modules files="*.py" + +private +#macro cppdebugflags '$(cppdebugflags_s)' +#macro_remove componentshr_linkopts "-Wl,-s" +## c++ flags +#macro cppflags " -g -Wall -W -Woverloaded-virtual -Wno-unused-parameter -Wno-unused-variable -I$(ROOTSYS)/include -I../Root -IRoot" +#macro cpplinkflags " -L${ROOTSYS}/lib -lRIO -lNet -lCore -lCint -lTree -lHist -lMatrix -lPhysics -ldl " + diff --git a/Database/FileStager/python/FileStagerConfig.py b/Database/FileStager/python/FileStagerConfig.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad9e3dbc4766c36629cb17bb86fc412e993e876 --- /dev/null +++ b/Database/FileStager/python/FileStagerConfig.py @@ -0,0 +1,67 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +from AthenaCommon.Configurable import ConfigurableAlgorithm +from AthenaCommon.Constants import * # for "INFO" + +class FileStagerAlg( ConfigurableAlgorithm ): + # slots is a sequence of properties that the use is allowed to set + # either provide a tuple/list with names, like so: + # + # __slots__ = [ 'MyInt', 'MyBool', 'MyDouble', 'MyStringVec' ] + # + # or provide a dictionary with names and initial (error) values: + __slots__ = [ 'PipeLength', 'VerboseStager', 'VerboseWaiting', 'FirstFileAlreadyStaged' + 'TreeName', 'InfilePrefix', 'OutfilePrefix', + 'CpCommand', 'BaseTmpdir', 'CpArguments', + 'InputCollections', 'OutputCollections', + 'LogfileDir', 'KeepLogfiles', 'StoreStatistics' ] + + def __init__( self, name = "FileStagerAlg" ): + # Base class init MUST be called + super( FileStagerAlg, self ).__init__( name ) + + def getServices( self ): + # this method is required: it tells the framework which + # services are needed to run this algorithm successfully + # Note: the following services are not really needed, but + # it does allow to show how a couple of parameters can be + # set for these services (see below, configureXYZ) + return [ "MessageSvc", "ApplicationMgr" ] + + def getDlls( self ): + # this method is required: it tells the framework where the + # FileStagerAlg algorithm is located (in libFileStager.so) + return "FileStager" + + def getType( cls ): + return "FileStagerAlg" + + def setDefaults( cls, hello ): + # this method is required: it sets the defaults for the + # FileStagerAlg algorithm + # Note: if the user has already specified values for any of + # these properties, than those values are not overwritten + hello.PipeLength = 1 + hello.VerboseStager = False + hello.VerboseWaiting = True + hello.FirstFileAlreadyStaged = True + hello.TreeName = "CollectionTree" + hello.InfilePrefix = "gridcopy://" + hello.OutfilePrefix = "file:" + hello.CpCommand = "lcg-cp" + hello.CpArguments = "-v" + hello.CpArguments = "--vo" + hello.CpArguments = "atlas" + hello.LogfileDir = "" + hello.KeepLogfiles = False + hello.StoreStatistics = False + + def configureMessageSvc( self, msgsvc ): + # this method is optional: it is used to configure the + # MessageSvc, and will only be called if "MessageSvc" is in + # the list obtained from getServices + # Note: in general you don't want to touch top level svcs + # here, and you should configure FileStagerAlg.OutputLevel in + # setDefaults() instead + msgsvc.OutputLevel = WARNING + diff --git a/Database/FileStager/python/FileStagerTool.py b/Database/FileStager/python/FileStagerTool.py new file mode 100644 index 0000000000000000000000000000000000000000..6cf3df6808dd8adb0333957c77f215e983da8221 --- /dev/null +++ b/Database/FileStager/python/FileStagerTool.py @@ -0,0 +1,232 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +import os +import sys, string, commands +from datetime import datetime + +## Needed to correct ROOT behavior; see below +CWD = os.getcwd() +import ROOT +## Importing gSystem may change the current directory to one of the +## command-line arguments; chdir to original directory to have +## predictable behavior +from ROOT import gSystem +os.chdir(CWD) +## Import the ROOT library for reading han results +gSystem.Load('libFileStagerLib') +from ROOT import TStageManager + +class FileStagerTool: + def __init__(self, sampleFile="", baseDir=".", tmpDir="", + CpCommand = "lcg-cp", + CpArguments = [ "-v", "--vo", "atlas", "-t", "1200" ], + InfilePrefix = "gridcopy://", + OutfilePrefix = "file:", + StageFirstFile = True, + sampleList = [], + checkGridProxy = True, + tryGridRestart = True, + gridCertificate = os.environ['HOME'] + "/.globus/gridproxy.cert", + LogfileDir = "" + ): + + self.sampleFile = sampleFile + self.sampleList = sampleList + self.tmpSampleList = [] + + self.CpCommand = CpCommand + self.CpArguments = CpArguments + self.InfilePrefix = InfilePrefix + self.OutfilePrefix = OutfilePrefix + self.StageFirstFile = StageFirstFile + + self.checkGridProxy = checkGridProxy and (CpCommand == "lcg-cp") + self.tryGridRestart = tryGridRestart + self.gridCertificate = gridCertificate + + if ( len(LogfileDir)>0 ): + if ( LogfileDir.rfind("/")!=(len(LogfileDir)-1) ): + LogfileDir+="/" + self.LogfileDir = LogfileDir + + if ( baseDir.rfind("/")!=(len(baseDir)-1) ): + baseDir+="/" + self.baseDir = baseDir + + if ( len(tmpDir)>0 ): + if ( tmpDir.rfind("/")!=(len(tmpDir)-1) ): + tmpDir+="/" + self.tmpDir = tmpDir + + # process sample file + self.ProcessSampleFile() + + def ProcessSampleFile(self): + + if (len(self.sampleFile)==0): return + + self.sampleList = [] + FileList=open(self.sampleFile).readlines() + + for file in FileList: + file = file.strip() + + if (len(file)==0): + continue + elif (file.find("#")==0): + continue + elif (file.find("TITLE")==0): + continue + elif (file.find("FLAGS")==0): + continue + else: + self.sampleList.append(file) + + def PrintSampleList(self,outputFile=""): + printlist = "sampleList = %s" % self.sampleList + if (len(outputFile)>0): + outFile = open(outputFile,'w') + outFile.write(printlist) + outFile.close() + print "Sample list has been written to <%s>. Goodbye.\n" % outputFile + else: print printlist + "\n" + + def GetSampleList(self): + return self.sampleList + + def ListFromLocal(self,localDir): + os.system("ls -l "+localDir+" > /tmp/filelist.temp") + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=[localDir+"/"+file.split()[8] + for file in FileList + if file.split()[4]!="0"] + os.system("rm -f /tmp/filelist.temp") + return FileList1 + + def ListFromCastor(self,castorDir): + os.system("nsls -l "+castorDir+" > /tmp/filelist.temp") + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=["rfio:"+castorDir+file.split()[8] + for file in FileList + if file.split()[4]!="0"] + os.system("rm /tmp/filelist.temp") + return FileList1 + + def GetTmpdir(self): + # default + defaultTmpdir = "" + + # last fallback + try: + defaultTmpdir = os.environ['TMPDIR'] + except Exception,inst: + pass + # cern lxbatch + try: + defaultTmpdir = os.environ['WORKDIR'] + except Exception,inst: + pass + # osg + try: + defaultTmpdir = os.environ['OSG_WN_TMP'] + except Exception,inst: + pass + # lcg + try: + defaultTmpdir = os.environ['EDG_WL_SCRATCH'] + except Exception,inst: + pass + + # use given tmpdir + if (len(self.tmpDir)>0): defaultTmpdir = self.tmpDir + + # fall back option + if (len(defaultTmpdir)==0): defaultTmpdir="/tmp" + + return defaultTmpdir + + def GetStageCollections(self): + baseTmpdir = self.GetTmpdir() + + if ( baseTmpdir.rfind("/")!=(len(baseTmpdir)-1) ): + baseTmpdir+="/" + + self.tmpSampleList = [] + + for input in self.sampleList: + filePiece = input.split("/") + nPieces = len(filePiece) + tmpFile = baseTmpdir + "tcf_" + filePiece[nPieces-1] + self.tmpSampleList.append(tmpFile) + + if (self.StageFirstFile): self.StageFirstCollection() + + return self.tmpSampleList + + def StageFirstCollection(self): + + # stage first file ... + if(len(self.sampleList)>=1): + rfioPiece = self.sampleList[0].split(self.InfilePrefix) + nPieces = len(rfioPiece) + + inFile = rfioPiece[nPieces-1] + outFile = self.tmpSampleList[0] + stderr = outFile+"_stage.err" + stdout = outFile+"_stage.out" + + if (len(self.LogfileDir)>0): + baseTmpdir = self.GetTmpdir() + if ( baseTmpdir.rfind("/")!=(len(baseTmpdir)-1) ): baseTmpdir+="/" + stderr = stderr.replace(baseTmpdir,self.LogfileDir) + stdout = stdout.replace(baseTmpdir,self.LogfileDir) + + #print "TStageManager::getFile() : Waiting till <%s> is staged." % (self.sampleList[0]) + + if (True): + stageman = TStageManager.instance() + stageman.setPidInLognames(False) + stageman.setLogfileDir(self.LogfileDir) + stageman.setBaseTmpdir(self.GetTmpdir()) + stageman.setInfilePrefix(self.InfilePrefix) + stageman.setOutfilePrefix(self.OutfilePrefix) + stageman.setCpCommand(self.CpCommand) + for addArg in self.CpArguments: + stageman.addCpArg(addArg) + stageman.getFile(self.sampleList[0]) + stageman.setPidInLognames(True) + #else: + # sysCommand = "time "+self.CpCommand + # for addArg in self.CpArguments: + # sysCommand += " " + addArg + # sysCommand += " "+inFile+" "+self.OutfilePrefix+outFile+" >"+stdout+" 2>"+stderr + # os.system(sysCommand) + + def DoStaging(self): + doStaging = False + if (len(self.sampleList)>0): + if(self.sampleList[0].find(self.InfilePrefix)!=-1): + doStaging = True + + if (doStaging and self.checkGridProxy): + retcode = self.CheckGrid() + if (retcode!=0): + if (self.tryGridRestart): + retcode2 = self.TryGridRestart() + if (retcode2!=0): sys.exit(1) + else: sys.exit(1) + return doStaging + + def CheckGrid(self): + (retcode,output) = commands.getstatusoutput("grid-proxy-info -exists") + if (retcode!=0): + print "\nFileStager.FileStagerTool() : ERROR : grid proxy certificate not found." + return retcode + + def TryGridRestart(self): + #os.environ['X509_USER_PROXY'] = self.gridCertificate + (retcode,output) = commands.getstatusoutput("voms-proxy-init -voms atlas -noregen") + if (retcode!=0): + print "\nFileStager.FileStagerTool() : ERROR : grid proxy restart failed. Exiting." + return retcode + diff --git a/Database/FileStager/python/__init__.py b/Database/FileStager/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dce5ff5afd85615f0ef8ad9eab35f094ffbbd7cd --- /dev/null +++ b/Database/FileStager/python/__init__.py @@ -0,0 +1,10 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# File: FileStager/__init__.py +# Author: Max Baak (mbaak@cern.ch) + +__version__ = '1.0.0' +__author__ = 'Max Baak (mbaak@cern.ch)' + +__all__ = [ 'FileStagerConfig' ] + diff --git a/Database/FileStager/run/stagerExamplePyRoot.py b/Database/FileStager/run/stagerExamplePyRoot.py new file mode 100644 index 0000000000000000000000000000000000000000..41c6dc53600f8cbaf26c7e31db61136f487d370a --- /dev/null +++ b/Database/FileStager/run/stagerExamplePyRoot.py @@ -0,0 +1,97 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# PyRoot example, by Till Eifert + +import os +import sys, string, commands +from datetime import datetime + +## Needed to correct ROOT behavior; see below +CWD = os.getcwd() +import ROOT +## Importing gSystem may change the current directory to one of the +## command-line arguments; chdir to original directory to have +## predictable behavior +from ROOT import gSystem, gROOT +os.chdir(CWD) +## Import the ROOT library for reading han results +gSystem.Load('libFileStagerLib') +from ROOT import TStageManager, TCopyFile, TCopyChain + + + +def init(): + # Load the filestager dictionary + gSystem.Load("../${CMTCONFIG}/libFileStagerLib.so") + # make sure ROOT picks up TCopyFile when filename starts with gridcopy:// + gROOT.GetPluginManager().AddHandler("TFile", "^gridcopy:", "TCopyFile","TCopyFile_cxx", "TCopyFile(const char*,Option_t*,const char*,Int_t)") + pass + + + +def Example(sample, doStaging=True, treeName="CollectionTree" ) : + # load the filestager library, configure root + init() + # + # EXAMPLE: RFIO + if (doStaging) : + # turn on staging functionality + TCopyChain.SetOriginalTChain(False) + TCopyFile.SetOriginalTFile(False) + # stager settings + manager = TStageManager.instance() + manager.setInfilePrefix("gridcopy://") + manager.setCpCommand("rfcp") + # by default manager stores in $TMPDIR, or else /tmp ifndef + #manager.setBaseTmpdir("/tmpdir") + #manager.setPipeLength(1) + # turn on verbosity + # manager.verbose() # lots of output + manager.verboseWait() # useful to see if your next has not yet finished staging + pass + # + # --- Find and open sample definition file --- + # + f = file(sample, "r") + if (not f) : + print "Error opening sample definition file <%s>" % (defFileName) + exit() + pass + # + ntupleChain = TCopyChain(treeName) + # + for line in f: + ls = line.strip() + if ls.find("#") == 0 : continue + if ls.find("//") ==0 : continue + if (len(ls) == 0) : continue + if ls.find("TITLE") != -1 : continue + if ls.find("FLAGS") != -1 : continue + print "Adding file <%s>" %(ls) + ntupleChain.Add(ls) + pass + # + print "Done adding samples." + # + # Loop over chain + nentries = ntupleChain.GetEntriesFast() + jentry=0 + nb=0 + # + for i in xrange(nentries) : + ientry = ntupleChain.LoadTree(jentry) #in case of a TChain, ientry is the entry number in the current file + isNewTree = (ientry==0) + if (ientry < 0) : break + if jentry % 1000 == 0 : + print "Now processing event %i" % jentry + ## use this to retrieve events. + #size = ntupleChain.GetEntry(jentry) + #if (size > 0) : + # nb += size + #else : + # print "Framework: end of tree reached, stopping event processing loop" + # break + # + jentry += 1; + pass + diff --git a/Database/FileStager/run/stagerExampleRoot.C b/Database/FileStager/run/stagerExampleRoot.C new file mode 100644 index 0000000000000000000000000000000000000000..80cd9548fa8aacf7b55837ddec0ee3c1ebe73acc --- /dev/null +++ b/Database/FileStager/run/stagerExampleRoot.C @@ -0,0 +1,159 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + + +#include <stdio.h> +#include <iostream> + +//#include "Root/TCopyChain.h" +//#include "Root/TCopyFile.h" +//#include "Root/TStageManager.h" + +#include <TROOT.h> +#include <TPluginManager.h> +//#include <TChain.h> +#include <TTree.h> +#include <string> +#include <sstream> +#include <fstream> +#include <list> + +using namespace std; + + +void stagerExampleRoot() +{ +} + +void +rootinit(void) +{ + // Load the filestager dictionary + gSystem->Load("../$CMTCONFIG/libFileStagerLib.so"); + + // make sure ROOT picks up TCopyFile when filename starts with gridcopy:// + gROOT->GetPluginManager()->AddHandler("TFile", "^gridcopy:", "TCopyFile","TCopyFile_cxx", + "TCopyFile(const char*,Option_t*,const char*,Int_t)"); +} + + +void +Example(const char* sample, bool doStaging=true, const char* treeName="CollectionTree" ) +{ + // load the filestager library, configure root + rootinit(); + + + /* + // EXAMPLE: LCG-CP + if (doStaging) { + // turn on staging functionality + TCopyChain::SetOriginalTChain(false); + TCopyFile::SetOriginalTFile(false); + + // stager settings + TStageManager& manager = TStageManager::instance(); + manager.setInfilePrefix("gridcopy://"); + manager.setCpCommand("wrapper_lcg-cp"); + //by default manager stores in $TMPDIR, or else /tmp ifndef + //manager.setBaseTmpdir("/tmpdir"); + //manager.setPipeLength(1); + + //// useful for interactive running + //// this option keeps stages files, and checks for files from previous stage. + //manager.setInteractive(); + + // turn on verbosity + if (kFALSE) manager.verbose(); // lots of output + if (kTRUE) manager.verboseWait(); // useful to see only if your next has not yet finished staging + + // useful for interactive running + // this option keeps stages files, and checks for files from previous stage. + //manager.setInteractive(); + } + */ + + //EXAMPLE: RFIO + if (doStaging) { + // turn on staging functionality + TCopyChain::SetOriginalTChain(false); + TCopyFile::SetOriginalTFile(false); + + // stager settings + TStageManager& manager = TStageManager::instance(); + manager.setInfilePrefix("gridcopy://"); + manager.setCpCommand("rfcp"); + //by default manager stores in $TMPDIR, or else /tmp ifndef + //manager.setBaseTmpdir("/tmpdir"); + //manager.setPipeLength(1); + + // turn on verbosity + if (kFALSE) manager.verbose(); // lots of output + if (kTRUE) manager.verboseWait(); // useful to see only if your next has not yet finished staging + + // useful for interactive running + // this option keeps stages files, and checks for files from previous stage. + //manager.setInteractive(); + } + + + // --- Find and open sample definition file --- + TString defFileName(sample) ; + ifstream ifs(defFileName) ; + if (!ifs) { + cout << "Error opening sample definition file <" << defFileName << ">" << endl ; + exit(2); + return 0; + } + + TCopyChain *ntupleChain = new TCopyChain(treeName); + + char buf[1024]; + + while (!ifs.eof()) { + ifs.getline(buf,sizeof(buf),'\n'); + + if (strncmp(buf,"#",1)==0) continue; + if (strncmp(buf,"//",2)==0) continue; + string bufstr(buf); + if (bufstr.empty()) continue; + + if (bufstr.find("TITLE")!=std::string::npos) continue; + if (bufstr.find("FLAGS")!=std::string::npos) continue; + + cout << "Adding file <" << bufstr << ">" << endl ; + ntupleChain->Add(buf) ; + } + + cout << "Done adding samples.\n" << endl; + + + + // Loop over chain + Long64_t nentries = ntupleChain->GetEntriesFast(); + Long64_t jentry=0; + Int_t nb=0; + + while(jentry<nentries) { + + Int_t ientry = ntupleChain->LoadTree(jentry); //in case of a TChain, ientry is the entry number in the current file + Bool_t newTree = (ientry==0) ; + if (ientry < 0) break; + + if (jentry % 1000 == 0) { + cout << "Now processing event " << jentry << endl ; + } + +/* // use this to retrieve events. + int size = ntupleChain->GetEntry(jentry) ; + if (size > 0) { + nb += size; + } else { + cout << "Framework: end of tree reached, stopping event processing loop" << endl ; + break; + } +*/ + ++jentry; + } +} diff --git a/Database/FileStager/scripts/create_poolCatalog b/Database/FileStager/scripts/create_poolCatalog new file mode 100755 index 0000000000000000000000000000000000000000..6623cb40434119ad13ac09f96a34b86e347ef231 --- /dev/null +++ b/Database/FileStager/scripts/create_poolCatalog @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# +# usage: create_poolCatalog <samplefile> + +import sys +import os +import string, commands +from datetime import datetime + +def CreatePoolCatalog(sampleFile): + cmd = "pool_insertFileToCatalog " + FileList=open(sampleFile,"r").readlines() + for file in FileList: + file = file.strip() + if (file.find("#")==0): continue + elif (file.find("TITLE")==0): continue + elif (file.find("FLAGS")==0): continue + else: + cmd += file + " " + os.system(cmd) + return + +## main +if __name__ == "__main__": + + usageline = "usage: " + sys.argv[0] + " <samplefile>" + if (len(sys.argv)<2): + print usageline + sys.exit(1) + + if not os.access(sys.argv[1],os.F_OK): + print "ERROR: sample definition file <%s> not found. Exit." % sys.argv[1] + print usageline + sys.exit(1) + + sampleFile = sys.argv[1] + + print "# Run: %s %s" % (sys.argv[0],sampleFile) + + CreatePoolCatalog(sampleFile) + diff --git a/Database/FileStager/scripts/define_castor_sample b/Database/FileStager/scripts/define_castor_sample new file mode 100755 index 0000000000000000000000000000000000000000..ab38a6af88d6fb8bdae77ef2c2ee578c09b273f2 --- /dev/null +++ b/Database/FileStager/scripts/define_castor_sample @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# +# usage: define_castor_sample [-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>] +# + +import sys +import os +import string, commands +from datetime import datetime + + +def ListFromCastor(prefix,castorDir): + searchcmd = "nsls -l "+castorDir + os.system(searchcmd+ " > /tmp/filelist.temp") + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=[prefix+castorDir+file.split()[8] + for file in FileList + if file.split()[4]!="0"] + os.system("rm /tmp/filelist.temp") + return FileList1,searchcmd + + +def writeSampleFile(sampleName,filename,filelist,searchcmd="",noCollPerFile=-1,printFlags=True): + + fileNames = [] + timenow = datetime.today().strftime("%d%b%y.%Hh%M") + + # multiple files + if (noCollPerFile>0): + fidx = 0 + idx = 0 + while(idx<len(filelist)): + if (idx % noCollPerFile == 0): + if (fidx!=0): file.close() + filenameI = filename.replace(".def","_"+str(fidx)+".def") + file = open(filenameI,'w') + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"_"+str(fidx)+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + fileNames.append(filenameI) + fidx += 1 + file.write(filelist[idx] + "\n") + idx += 1 + file.close() + # one file + else: + file = open(filename,'w') + fileNames.append(filename) + if (printFlags): + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + for line in filelist: + file.write(line + "\n") + file.close() + + return fileNames + + +## main +if __name__ == "__main__": + + (retcode,output) = commands.getstatusoutput("which rfdir") + if (retcode!=0): + print sys.argv[0], "ERROR: rfdir not found, cannot search for files." + sys.exit(1) + + if (len(sys.argv)<2): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>]" + sys.exit(1) + + sampleName = "" + idx = 0 + + if (sys.argv[1] == "-n"): + if (len(sys.argv)<4): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>]" + sys.exit(1) + else: + sampleName = sys.argv[2] + idx = 2 + + print "Running: ", sys.argv[0] + + # castor directory + castorDir = sys.argv[idx+1] + while ( len(castorDir)>=1 and (castorDir.rfind("/")==(len(castorDir)-1)) ): + castorDir = castorDir.rstrip("/") + castorDir+="/" + print ">> castor directory: ", castorDir + + # sample + if (len(sampleName)==0): + castorSplit = castorDir.split("/") + sampleName = castorSplit[len(castorSplit)-2] + if (len(sampleName)==0): + sampleName = "default" + sampleDir = "samples/" + os.system("mkdir -p "+sampleDir) + sampleFile = sampleDir+sampleName+"_rfio.def" + + # number of collections per file + noCollPerFile = -1 + if (len(sys.argv) >= idx+3): + noCollPerFile = int(sys.argv[idx+2]) + + # prefix + infileprefix = "gridcopy://" + if (len(sys.argv) >= idx+4): + infileprefix = sys.argv[idx+3] + + # no of collections + (fileList,searchcmd) = ListFromCastor(infileprefix,castorDir) + print ">> collections found: ", len(fileList) + + # write output to file + if (len(fileList)>0): + sampleFiles = writeSampleFile(sampleName,sampleFile,fileList,searchcmd,noCollPerFile) + print ">> max. collections/file: ", noCollPerFile, " (-1 = inf.)" + print ">> filename prefix: ", infileprefix + print ">> sample output file(s): \n", sampleFiles + diff --git a/Database/FileStager/scripts/define_dq2_sample b/Database/FileStager/scripts/define_dq2_sample new file mode 100755 index 0000000000000000000000000000000000000000..028794adf74fff29559dcc79c9065c67b6aca2a3 --- /dev/null +++ b/Database/FileStager/scripts/define_dq2_sample @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# +# usage: define_dq2_sample [-n <sampleName>] <dq2-sample> [<numberCollectionsPerFile>] [<dq2-destination>] [<gridcopyPrefix>] +# +# see list of dq2 destinations with cmd: dq2-destinations +# + +import sys +import os +import string, commands +from datetime import datetime +import socket + +from dq2.info import TiersOfATLAS + +def ListFromDq2(infileprefix,dq2destiny,dq2sample): + searchcmd = "dq2-ls -L "+dq2destiny+ " -p -f "+dq2sample + os.system(searchcmd+" | grep root | grep -v tgz > /tmp/filelist.temp") + #print searchcmd+" > /tmp/filelist.temp" + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=[] + for file in FileList: + file = file.strip() + if (file.find("srm://")==0): + gridcopyfile = infileprefix+file + FileList1.append(gridcopyfile) + os.system("rm /tmp/filelist.temp") + return FileList1,searchcmd + + +def writeSampleFile(sampleName,filename,filelist,searchcmd="",noCollPerFile=-1,printFlags=True): + + fileNames = [] + timenow = datetime.today().strftime("%d%b%y.%Hh%M") + + # multiple files + if (noCollPerFile>0): + fidx = 0 + idx = 0 + while(idx<len(filelist)): + if (idx % noCollPerFile == 0): + if (fidx!=0): file.close() + filenameI = filename.replace(".def","_"+str(fidx)+".def") + file = open(filenameI,'w') + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"_"+str(fidx)+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + fileNames.append(filenameI) + fidx += 1 + file.write(filelist[idx] + "\n") + idx += 1 + file.close() + # one file + else: + file = open(filename,'w') + fileNames.append(filename) + if (printFlags): + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + for line in filelist: + file.write(line + "\n") + file.close() + + return fileNames + + +## main +if __name__ == "__main__": + + (retcode,output) = commands.getstatusoutput("which dq2-ls") + if (retcode!=0): + print sys.argv[0], "ERROR: dq2-ls not found, be sure to run DQ2 setup script first." + sys.exit(1) + + if (len(sys.argv)<2): + print "usage: ", sys.argv[0], "[-n <sampleName>] <dq2-sample> [<numberCollectionsPerFile>] [<dq2-destination>] [<gridcopyPrefix>]" + print "(to see list of dq2-destinations, use: dq2-destinations)" + sys.exit(1) + + sampleName = "" + idx = 0 + + if (sys.argv[1] == "-n"): + if (len(sys.argv)<4): + print "usage: ", sys.argv[0], "[-n <sampleName>] <dq2-sample> [<numberCollectionsPerFile>] [<dq2-destination>] [<gridcopyPrefix>]" + print "(to see list of dq2-destinations, use: dq2-destinations)" + sys.exit(1) + else: + sampleName = sys.argv[2] + idx = 2 + + print "Running: ", sys.argv[0] + + # dq2 directory + dq2sample = sys.argv[idx+1] + #while ( len(dq2sample)>=1 and (dq2sample.rfind("/")==(len(dq2sample)-1)) ): + # dq2sample = dq2sample.rstrip("/") + print ">> dq2 sample: ", dq2sample + + # sample + if (len(sampleName)==0): + dq2Split = dq2sample.split("/") + sampleName = dq2Split[len(dq2Split)-2] + if (len(sampleName)==0): + sampleName = "default" + sampleDir = "samples/" + os.system("mkdir -p "+sampleDir) + sampleFile = sampleDir+sampleName+".def" + + # number of collections per file + noCollPerFile = -1 + if (len(sys.argv) >= idx+3): + noCollPerFile = int(sys.argv[idx+2]) + + # dq2destiny - with restrictions to avoid cross-domain data transfers + dq2destiny = "CERN-PROD_MCDISK" + if (len(sys.argv) >= idx+4): + dq2destiny = sys.argv[idx+3] + + hostDomain = string.join(socket.gethostbyaddr(socket.gethostname())[0].split('.')[-2:], '.') + toaDomain = TiersOfATLAS.getSiteProperty(dq2destiny, 'domain') + + if not toaDomain: + print "ERROR: DQ2 site is unknown: %s" % dq2destiny + sys.exit(2) + + ## allowing unige.ch to access CERN-PROD + if dq2destiny.startswith('CERN-PROD'): + if hostDomain == 'unige.ch': + pass + + ## check the match of client and dq2destiny domain + if hostDomain not in toaDomain: + print "ERROR: client domain (%s) not close to dataset location (%s)" % (hostDomain, dq2destiny) + sys.exit(2) + + # prefix + infileprefix = "gridcopy://" + if (len(sys.argv) >= idx+5): + infileprefix = sys.argv[idx+4] + + # no of collections + fileList,searchcmd = ListFromDq2(infileprefix,dq2destiny,dq2sample) + print ">> collections found: ", len(fileList) + + # write output to file + if (len(fileList)>0): + sampleFiles = writeSampleFile(sampleName,sampleFile,fileList,searchcmd,noCollPerFile) + print ">> max. collections/file: ", noCollPerFile, " (-1 = inf.)" + print ">> dq2 destination: ", dq2destiny + print ">> filename prefix: ", infileprefix + print ">> sample output file(s): \n", sampleFiles + print "\n NOTE: To prevent unnecessary network traffic, you are kindly asked to use\n the FileStager only for running over nearby grid collections.\n" + diff --git a/Database/FileStager/scripts/define_local_sample b/Database/FileStager/scripts/define_local_sample new file mode 100755 index 0000000000000000000000000000000000000000..484b9c6d3c39f4f64e676552938d7be5b571459c --- /dev/null +++ b/Database/FileStager/scripts/define_local_sample @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# +# usage: define_local_sample [-n <sampleName>] <localDirectory> [<numberCollectionsPerFile>] + +import sys +import os +import string, commands +from datetime import datetime + + +def ListFromLocal(prefix,localDir): + searchcmd = "dir "+localDir + rootfiles = [] + for file in os.listdir(localDir): + file = localDir + file + if file.find('.root')>0: rootfiles.append(file) + return rootfiles,searchcmd + + +def writeSampleFile(sampleName,filename,filelist,searchcmd="",noCollPerFile=-1,printFlags=True): + + fileNames = [] + timenow = datetime.today().strftime("%d%b%y.%Hh%M") + + # multiple files + if (noCollPerFile>0): + fidx = 0 + idx = 0 + while(idx<len(filelist)): + if (idx % noCollPerFile == 0): + if (fidx!=0): file.close() + filenameI = filename.replace(".def","_"+str(fidx)+".def") + file = open(filenameI,'w') + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"_"+str(fidx)+"\n") + file.write("FLAGS: GridCopy=0"+"\n") + fileNames.append(filenameI) + fidx += 1 + file.write(filelist[idx] + "\n") + idx += 1 + file.close() + # one file + else: + file = open(filename,'w') + fileNames.append(filename) + if (printFlags): + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + for line in filelist: + file.write(line + "\n") + file.close() + + return fileNames + + +## main +if __name__ == "__main__": + + if (len(sys.argv)<2): + print "usage: ", sys.argv[0], "[-n <sampleName>] <localDirectory> [<numberCollectionsPerFile>]" + sys.exit(1) + + sampleName = "" + idx = 0 + + if (sys.argv[1] == "-n"): + if (len(sys.argv)<4): + print "usage: ", sys.argv[0], "[-n <sampleName>] <localDirectory> [<numberCollectionsPerFile>]" + sys.exit(1) + else: + sampleName = sys.argv[2] + idx = 2 + + print "Running: ", sys.argv[0] + + # local directory + localDir = sys.argv[idx+1] + while ( len(localDir)>=1 and (localDir.rfind("/")==(len(localDir)-1)) ): + localDir = localDir.rstrip("/") + localDir+="/" + print ">> local directory: ", localDir + + # sample + if (len(sampleName)==0): + localSplit = localDir.split("/") + sampleName = localSplit[len(localSplit)-2] + if (len(sampleName)==0): + sampleName = "default" + sampleDir = "samples/" + os.system("mkdir -p "+sampleDir) + sampleFile = sampleDir+sampleName+".def" + + # number of collections per file + noCollPerFile = -1 + if (len(sys.argv) >= idx+3): + noCollPerFile = int(sys.argv[idx+2]) + + # prefix, not used here + infileprefix = "" + + # no of collections + (fileList,searchcmd) = ListFromLocal(infileprefix,localDir) + print ">> collections found: ", len(fileList) + + # write output to file + if (len(fileList)>0): + sampleFiles = writeSampleFile(sampleName,sampleFile,fileList,searchcmd,noCollPerFile) + print ">> max. collections/file: ", noCollPerFile, " (-1 = inf.)" + print ">> filename prefix: ", infileprefix + print ">> sample output file(s): \n", sampleFiles + diff --git a/Database/FileStager/scripts/define_rfio_sample b/Database/FileStager/scripts/define_rfio_sample new file mode 100755 index 0000000000000000000000000000000000000000..a017afc78b954c8eb69c99f8105199ee71c3934e --- /dev/null +++ b/Database/FileStager/scripts/define_rfio_sample @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# +# usage: define_rfio_sample [-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>] +# + +import sys +import os +import string, commands +from datetime import datetime + + +def ListFromCastor(prefix,castorDir): + searchcmd = "nsls -l "+castorDir + os.system(searchcmd+ " > /tmp/filelist.temp") + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=[prefix+castorDir+file.split()[8] + for file in FileList + if file.split()[4]!="0"] + os.system("rm /tmp/filelist.temp") + return FileList1,searchcmd + + +def writeSampleFile(sampleName,filename,filelist,searchcmd="",noCollPerFile=-1,printFlags=True): + + fileNames = [] + timenow = datetime.today().strftime("%d%b%y.%Hh%M") + + # multiple files + if (noCollPerFile>0): + fidx = 0 + idx = 0 + while(idx<len(filelist)): + if (idx % noCollPerFile == 0): + if (fidx!=0): file.close() + filenameI = filename.replace(".def","_"+str(fidx)+".def") + file = open(filenameI,'w') + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"_"+str(fidx)+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + fileNames.append(filenameI) + fidx += 1 + file.write(filelist[idx] + "\n") + idx += 1 + file.close() + # one file + else: + file = open(filename,'w') + fileNames.append(filename) + if (printFlags): + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + for line in filelist: + file.write(line + "\n") + file.close() + + return fileNames + + +## main +if __name__ == "__main__": + + (retcode,output) = commands.getstatusoutput("which rfdir") + if (retcode!=0): + print sys.argv[0], "ERROR: rfdir not found, cannot search for files." + sys.exit(1) + + if (len(sys.argv)<2): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>]" + sys.exit(1) + + sampleName = "" + idx = 0 + + if (sys.argv[1] == "-n"): + if (len(sys.argv)<4): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>]" + sys.exit(1) + else: + sampleName = sys.argv[2] + idx = 2 + + print "Running: ", sys.argv[0] + + # castor directory + castorDir = sys.argv[idx+1] + while ( len(castorDir)>=1 and (castorDir.rfind("/")==(len(castorDir)-1)) ): + castorDir = castorDir.rstrip("/") + castorDir+="/" + print ">> castor directory: ", castorDir + + # sample + if (len(sampleName)==0): + castorSplit = castorDir.split("/") + sampleName = castorSplit[len(castorSplit)-2] + if (len(sampleName)==0): + sampleName = "default" + sampleDir = "samples/" + os.system("mkdir -p "+sampleDir) + sampleFile = sampleDir+sampleName+"rfio.def" + + # number of collections per file + noCollPerFile = -1 + if (len(sys.argv) >= idx+3): + noCollPerFile = int(sys.argv[idx+2]) + + # prefix + infileprefix = "gridcopy://" + if (len(sys.argv) >= idx+4): + infileprefix = sys.argv[idx+3] + + # no of collections + (fileList,searchcmd) = ListFromCastor(infileprefix,castorDir) + print ">> collections found: ", len(fileList) + + # write output to file + if (len(fileList)>0): + sampleFiles = writeSampleFile(sampleName,sampleFile,fileList,searchcmd,noCollPerFile) + print ">> max. collections/file: ", noCollPerFile, " (-1 = inf.)" + print ">> filename prefix: ", infileprefix + print ">> sample output file(s): \n", sampleFiles + diff --git a/Database/FileStager/scripts/define_xrootd_sample b/Database/FileStager/scripts/define_xrootd_sample new file mode 100755 index 0000000000000000000000000000000000000000..0587213a83e6c4ce68fd929054fb3f32e13c2ca4 --- /dev/null +++ b/Database/FileStager/scripts/define_xrootd_sample @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# +# usage: define_xrootd_sample [-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<gridcopyPrefix>] +# + +import sys +import os +import string, commands +from datetime import datetime + + +def ListFromCastor(prefix,castorDir): + searchcmd = "nsls -l "+castorDir + os.system(searchcmd+ " > /tmp/filelist.temp") + FileList=open("/tmp/filelist.temp","r").readlines() + FileList1=[prefix+castorDir+file.split()[8] + for file in FileList + if file.split()[4]!="0"] + os.system("rm /tmp/filelist.temp") + return FileList1,searchcmd + + +def writeSampleFile(sampleName,filename,filelist,searchcmd="",noCollPerFile=-1,printFlags=True): + + fileNames = [] + timenow = datetime.today().strftime("%d%b%y.%Hh%M") + + # multiple files + if (noCollPerFile>0): + fidx = 0 + idx = 0 + while(idx<len(filelist)): + if (idx % noCollPerFile == 0): + if (fidx!=0): file.close() + filenameI = filename.replace(".def","_"+str(fidx)+".def") + file = open(filenameI,'w') + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"_"+str(fidx)+"\n") + file.write("FLAGS: GridCopy=0"+"\n") + fileNames.append(filenameI) + fidx += 1 + file.write(filelist[idx] + "\n") + idx += 1 + file.close() + # one file + else: + file = open(filename,'w') + fileNames.append(filename) + if (printFlags): + file.write("# created "+timenow+"\n") + file.write("# searchcmd: "+searchcmd+"\n") + file.write("TITLE: "+sampleName+"\n") + file.write("FLAGS: GridCopy=1"+"\n") + for line in filelist: + file.write(line + "\n") + file.close() + + return fileNames + + +## main +if __name__ == "__main__": + + (retcode,output) = commands.getstatusoutput("which rfdir") + if (retcode!=0): + print sys.argv[0], "ERROR: rfdir not found, cannot search for files." + sys.exit(1) + + if (len(sys.argv)<2): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<xrootdPrefix>]" + sys.exit(1) + + sampleName = "" + idx = 0 + + if (sys.argv[1] == "-n"): + if (len(sys.argv)<4): + print "usage: ", sys.argv[0], "[-n <sampleName>] <castorDirectory> [<numberCollectionsPerFile>] [<xrootdPrefix>]" + sys.exit(1) + else: + sampleName = sys.argv[2] + idx = 2 + + print "Running: ", sys.argv[0] + + # castor directory + castorDir = sys.argv[idx+1] + while ( len(castorDir)>=1 and (castorDir.rfind("/")==(len(castorDir)-1)) ): + castorDir = castorDir.rstrip("/") + castorDir+="/" + print ">> castor directory: ", castorDir + + # sample + if (len(sampleName)==0): + castorSplit = castorDir.split("/") + sampleName = castorSplit[len(castorSplit)-2] + if (len(sampleName)==0): + sampleName = "default" + sampleDir = "samples/" + os.system("mkdir -p "+sampleDir) + sampleFile = sampleDir+sampleName+"_xrootd.def" + + # number of collections per file + noCollPerFile = -1 + if (len(sys.argv) >= idx+3): + noCollPerFile = int(sys.argv[idx+2]) + + # prefix + infileprefix = "root://castoratlast3/" + if (len(sys.argv) >= idx+4): + infileprefix = sys.argv[idx+3] + + # no of collections + (fileList,searchcmd) = ListFromCastor(infileprefix,castorDir) + print ">> collections found: ", len(fileList) + + # write output to file + if (len(fileList)>0): + sampleFiles = writeSampleFile(sampleName,sampleFile,fileList,searchcmd,noCollPerFile) + print ">> max. collections/file: ", noCollPerFile, " (-1 = inf.)" + print ">> filename prefix: ", infileprefix + print ">> sample output file(s): \n", sampleFiles + diff --git a/Database/FileStager/scripts/interpret_sampleFile b/Database/FileStager/scripts/interpret_sampleFile new file mode 100755 index 0000000000000000000000000000000000000000..367ac3e05c8696aa8c3a3a76e0922184eec3fe41 --- /dev/null +++ b/Database/FileStager/scripts/interpret_sampleFile @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# +# usage: interpret_sampleFile <samplefile> [<outputfile.py>] + +import sys +import os +import string, commands +from datetime import datetime +from FileStager.FileStagerTool import FileStagerTool + +## main +if __name__ == "__main__": + + usageline = "usage: " + sys.argv[0] + " <samplefile> [<outputfile.py>]" + if (len(sys.argv)<2): + print usageline + sys.exit(1) + + if not os.access(sys.argv[1],os.F_OK): + print "ERROR: sample definition file <%s> not found. Exit." % sys.argv[1] + print usageline + sys.exit(1) + + sampleFile = sys.argv[1] + outputFile = "" + if (len(sys.argv)>=3): outputFile = sys.argv[2] + + print "# Run: %s %s %s" % (sys.argv[0],sampleFile,outputFile) + + stagetool = FileStagerTool(sampleFile=sampleFile) + stagetool.PrintSampleList(outputFile) + diff --git a/Database/FileStager/scripts/wrapper_fs-copy b/Database/FileStager/scripts/wrapper_fs-copy new file mode 100755 index 0000000000000000000000000000000000000000..7f54b2b3ad38c5513f6719f6079667f943364920 --- /dev/null +++ b/Database/FileStager/scripts/wrapper_fs-copy @@ -0,0 +1,367 @@ +#!/usr/bin/env python +#----------------------------------------------------- +# fs-copy.py is an wrapper script around the file copy +# command to provide more advanced features: +# +# 1. failure recovery by retring the copy command +# 2. quality check on the downloaded files by comparing the checksum +#----------------------------------------------------- + +import os +import os.path +#import shutil +#import tempfile + +import hashlib +#import md5 +import zlib +import sys +#import popen2 +import time +#import traceback +import pickle +import re +import getopt +import sys + +#subprocess.py crashes if python 2.5 is used +#try to import subprocess from local python installation before an +#import from PYTHON_DIR is attempted some time later +try: + import subprocess +except ImportError: + pass + +## Utility functions ## +def get_md5sum(fname): + ''' Calculates the MD5 checksum of a file ''' + + f = open(fname, 'rb') + m = md5.new() + while True: + d = f.read(8096) + if not d: + break + m.update(d) + f.close() + return m.hexdigest() + +def get_adler32sum(fname): + ''' Calculate the Adler32 checksum of a file ''' + + f = open(fname,'rb') + data = f.read() + cksum = hex( zlib.adler32(data) & 0xffffffff ) + f.close() + + # remove the tailing 'L' charactor + cksum = re.sub(r'L$','',cksum) + + return cksum + +def urisplit(uri): + ''' + Basic URI Parser according to STD66 aka RFC3986 + + >>> urisplit("scheme://authority/path?query#fragment") + ('scheme', 'authority', 'path', 'query', 'fragment') + + ''' + # regex straight from STD 66 section B + regex = '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' + p = re.match(regex, uri).groups() + scheme, authority, path, query, fragment = p[1], p[3], p[4], p[6], p[8] + #if not path: path = None + return (scheme, authority, path, query, fragment) + +def timeString(): + return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + +def printInfo(s, outfile): + outfile.write(timeString() + ' [Info]' + ' ' + str(s) + os.linesep) + outfile.flush() + +def printError(s, outfile): + outfile.write(timeString() + ' [Error]' + ' ' + str(s) + os.linesep) + outfile.flush() + +## system command executor with subprocess +def execSyscmdSubprocess(cmd, wdir=os.getcwd()): + + import os + import subprocess + + exitcode = -999 + + mystdout = '' + mystderr = '' + + try: + + ## resetting essential env. variables + my_env = os.environ + +## my_env['LD_LIBRARY_PATH'] = '' +## my_env['PATH'] = '' +## my_env['PYTHONPATH'] = '' + +## if my_env.has_key('LD_LIBRARY_PATH_ORIG'): +## my_env['LD_LIBRARY_PATH'] = my_env['LD_LIBRARY_PATH_ORIG'] + +## if my_env.has_key('PATH_ORIG'): +## my_env['PATH'] = my_env['PATH_ORIG'] + +## if my_env.has_key('PYTHONPATH_ORIG'): +## my_env['PYTHONPATH'] = my_env['PYTHONPATH_ORIG'] + + child = subprocess.Popen(cmd, cwd=wdir, env=my_env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + (mystdout, mystderr) = child.communicate() + + exitcode = child.returncode + + finally: + pass + + return (exitcode, mystdout, mystderr) + +## resolving TURL from a given SURL +def resolveTURL(surl, protocol, outfile, errfile, timeout=300): + + ## default lcg-gt timeout: 5 minutes + cmd = 'lcg-gt -v -t %d %s %s' % (timeout, surl, protocol) + + exitcode = -999 + + mystdout = '' + mystderr = '' + + turl = '' + + try: + # use subprocess to run the user's application if the module is available on the worker node + import subprocess + printInfo('Run lcg-gt cmd: "%s"' % cmd, outfile) + (exitcode, mystdout, mystderr) = execSyscmdSubprocess(cmd) + + # print out command outputs if the return code is not 0 + if exitcode != 0: + printInfo(mystdout, outfile) + printError(mystderr, errfile) + + except ImportError,err: + # otherwise, use separate threads to control process IO pipes + printError('Not able to load subprocess module', errfile) + + if exitcode == 0: + data = mystdout.strip().split('\n') + + try: + if re.match(protocol, data[0]): + turl = data[0] + except IndexError: + pass + + return turl + + +## copy subroutine definition +def make_copycmd(protocol, vo, timeout, src_surl, dest_surl): + ''' + routine for composing copy command according to the requested protocol. + ''' + + cmd = '' + + if protocol in [ 'lcgcp' ]: + cmd = 'lcg-cp -v --vo %s -t %d %s %s' % (vo, timeout, src_surl, dest_surl) + + elif protocol in [ 'xrootd' ]: + cmd = 'xrdcp %s %s' % (src_surl, dest_surl) #(src_turl, dest_fpath) + + elif protocol in [ 'rfcp' ]: + cmd = 'rfcp %s %s' % (src_surl, dest_surl) #(src_turl, dest_fpath) + + else: + + ## resolve TURL + src_turl = resolveTURL(src_surl, protocol, outfile, errfile) + + if src_turl: + + dest_fpath = '/' + re.sub(r'^(file:)?\/*','',dest_surl) + + if protocol in [ 'gsidcap', 'dcap' ]: + cmd = 'dccp -A -d 2 -o %d %s %s' % (timeout, src_turl, dest_fpath) + + elif protocol in [ 'rfio' ]: + cmd = 'rfcp %s %s' % (src_turl, dest_fpath) + + elif protocol in [ 'gsiftp' ]: + ## keep retrying the failed transfer operation within the given timeout + ## wait for 30 seconds to the next retry + cmd = 'globus-url-copy -rst-interval 30 -rst-timeout %d %s %s' % (timeout, src_turl, dest_surl) + + else: + pass + + return cmd + + + +# Main program +if __name__ == '__main__': + + ## example: + ## wrapper_fs-copy -s -p xrootd root://castoratlas//castor/cern.ch/grid/atlas/atlt3/susy/susy11/p543_spyroot/step1/muSUSY11b/Muons00180149_2.split.ntuple.root /tmp/mbaak/bar.root + + ## default value specification + supported_protocols = ['lcgcp','rfio','gsidcap','dcap','gsiftp','xrootd','rfcp'] + + protocol = 'lcgcp' + timeout = 1200 + vo = 'atlas' + max_trial = 3 + stdouterr = False + + src_surl = None + dest_surl = None + + ## a workaround to avoid passing protocol as a argument to this script + ## this should be disabled when the bug in Athena/FileStager is fixed + if os.environ.has_key('FILE_STAGER_PROTOCOL'): + if os.environ['FILE_STAGER_PROTOCOL'] in supported_protocols: + protocol = os.environ['FILE_STAGER_PROTOCOL'] + + ## parse command-line options/arguments + try: + opts, args = getopt.getopt(sys.argv[1:], 'p:t:s', ["vo=", "mt="]) + + for o,a in opts: + if o in [ '--vo' ]: + vo = a + elif o in [ '--mt' ]: + max_trial = int(a) + elif o in [ '-p' ]: + if a in supported_protocols: + protocol = a + print 'file copy protocol: %s' % a + else: + print 'protocal not supported: %s, trying %s' % (a, protocol) + elif o in [ '-t' ]: + timeout = int(a) + elif o in [ '-s' ]: + stdouterr = True + + if len(args) == 2: + src_surl = args[0] + dest_surl = args[1] + else: + raise getopt.GetoptError('missing source or destination SURL in command arguments.') + + except getopt.GetoptError, err: + ## close stdout/err and exit the program + print err + sys.exit(2) + + ## open files for command output and error + if stdouterr: + outfile = sys.stdout + errfile = sys.stderr + else: + outfile = open('FileStager.out','a') + errfile = open('FileStager.err','a') + + + ## load the checksum pickle if it exists + csumfile = 'lfc_checksum.pickle' + csum = None + if os.path.exists(csumfile): + f = open(csumfile,'r') + csum = pickle.load(f) + f.close() + + ## initialize trial count and flags + cnt_trial = 0 + isDone = False + + ## initialize the return code + rc = 0 + + ## main copy loop + while not isDone and ( cnt_trial < max_trial ): + + exitcode = -999 + + cnt_trial += 1 + + ## compose copy command + ## - timeout is increasd for each trial + copy_cmd = make_copycmd(protocol, vo, timeout*cnt_trial, src_surl, dest_surl) + + ## faile to compose the full copy command, give another try for the whole loop + if not copy_cmd: + printError('fail to compose copy command', errfile) + continue + + try: + # use subprocess to run the user's application if the module is available on the worker node + import subprocess + printInfo('Run copy cmd: "%s"' % copy_cmd, outfile) + (exitcode, mystdout, mystderr) = execSyscmdSubprocess(copy_cmd) + + # print command detail if return code is not 0 + if exitcode != 0: + printInfo(mystdout, outfile) + printError(mystderr, errfile) + + except ImportError,err: + # otherwise, use separate threads to control process IO pipes + printError('Not able to load subprocess module', errfile) + break + + printInfo( 'copy command exitcode: %s' % repr(exitcode), outfile ) + + if exitcode == 0: + ## try to get the checksum type/value stored in LFC + ## - the checksum dictionary is produced by 'make_filestager_joption.py' + ## and stored in a pickle file. + if csum and csum.has_key(src_surl): + + csum_type = csum[src_surl]['csumtype'] + csum_value = csum[src_surl]['csumvalue'] + + if csum_type and csum_value: + + ## do checksum comparison on the downloaded file + dest_file = urisplit(dest_surl)[2] + + csum_local = '' + + if csum_type.upper() == 'MD': + csum_local = get_md5sum(dest_file) + elif csum_type.upper() == 'AD': + # slight modification on the hex string to make it compatible with what stored in LFC + csum_local = get_adler32sum(dest_file).replace('0x','').zfill(8) + else: + pass + + if csum_local.lower() == csum_value.lower(): + printInfo( '%s checksum matched: %s' % (csum_type, csum_value), outfile ) + isDone = True + else: + printInfo( '%s checksum mismatch: %s ( local:%s != lfc:%s )' % (csum_type, src_surl, csum_local, csum_value), outfile ) + isDone = False + else: + printInfo( 'Ignore checksum comparison: %s' % src_surl, outfile) + isDone = True + + if not stdouterr: + outfile.close() + errfile.close() + + # wrapper script return code + if not isDone: + rc = 1 + + sys.exit(rc) diff --git a/Database/FileStager/scripts/wrapper_lcg-cp b/Database/FileStager/scripts/wrapper_lcg-cp new file mode 100755 index 0000000000000000000000000000000000000000..4254e4197dd3c944ab7f69c0e3e39c2b914cb4da --- /dev/null +++ b/Database/FileStager/scripts/wrapper_lcg-cp @@ -0,0 +1,264 @@ +#!/usr/bin/env python +#----------------------------------------------------- +# Author: Hurng-Chun Lee <Hurng-Chun.Lee@cern.ch> +# Last update: 6 Feb 2009 +# +# fs-copy.py is an wrapper script around the file copy +# command to provide more advanced features: +# +# 1. failure recovery by retring the copy command +# 2. quality check on the downloaded files by comparing the checksum +#----------------------------------------------------- + +import os +import os.path +#import shutil +#import tempfile +import md5 +import zlib +import sys +#import popen2 +import time +#import traceback +import pickle +import re +import getopt + +#subprocess.py crashes if python 2.5 is used +#try to import subprocess from local python installation before an +#import from PYTHON_DIR is attempted some time later +try: + import subprocess +except ImportError: + pass + +## Utility functions ## +def get_md5sum(fname): + ''' Calculates the MD5 checksum of a file ''' + + f = open(fname, 'rb') + m = md5.new() + while True: + d = f.read(8096) + if not d: + break + m.update(d) + f.close() + return m.hexdigest() + +def get_adler32sum(fname): + ''' Calculate the Adler32 checksum of a file ''' + + f = open(fname,'rb') + data = f.read() + cksum = hex( zlib.adler32(data) & 0xffffffff ) + f.close() + + # remove the tailing 'L' charactor + cksum = re.sub(r'L$','',cksum) + + return cksum + +def urisplit(uri): + """ + Basic URI Parser according to STD66 aka RFC3986 + + >>> urisplit("scheme://authority/path?query#fragment") + ('scheme', 'authority', 'path', 'query', 'fragment') + + """ + # regex straight from STD 66 section B + regex = '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' + p = re.match(regex, uri).groups() + scheme, authority, path, query, fragment = p[1], p[3], p[4], p[6], p[8] + #if not path: path = None + return (scheme, authority, path, query, fragment) + +def timeString(): + return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + +def printInfo(s, outfile): + outfile.write(timeString() + ' [Info]' + ' ' + str(s) + os.linesep) + outfile.flush() + +def printError(s, outfile): + outfile.write(timeString() + ' [Error]' + ' ' + str(s) + os.linesep) + outfile.flush() + +## system command executor with subprocess +def execSyscmdSubprocess(cmd, outfile, errfile, wdir=os.getcwd()): + + import os, subprocess + + exitcode = None + + try: + child = subprocess.Popen(cmd, cwd=wdir, shell=True, stdout=outfile, stderr=errfile) + + while 1: + exitcode = child.poll() + if exitcode is not None: + break + else: + outfile.flush() + errfile.flush() + time.sleep(0.3) + finally: + pass + + outfile.flush() + errfile.flush() + + printInfo('subprocess exit code: %d' % status, outfile) + + if exitcode != 0: + return False + else: + return True + +# Main program +if __name__ == '__main__': + + ## open files for command output and error + outfile = open('FileStager.out','a') + errfile = open('FileStager.err','a') + + ## default value specification + supported_protocols = ['lcgcp'] + + protocol = 'lcgcp' + timeout = 1200 + vo = 'atlas' + max_trial = 5 + + src_surl = None + dest_surl = None + + ## internal subroutine definition + def make_copycmd(protocol, vo, timeout, src_surl, dest_surl): + ''' + routine for composing copy command according to the requested protocol. + ''' + + cmd = '' + + if protocol in [ 'lcgcp' ]: + cmd = 'lcg-cp -v --vo %s -t %d %s %s' % (vo, timeout, src_surl, dest_surl) + else: + pass + + return cmd + + ## parse command-line options/arguments + try: + opts, args = getopt.getopt(sys.argv[1:], 'p:t:', ["vo=", "mt="]) + + for o,a in opts: + if o in [ '--vo' ]: + vo = a + elif o in [ '--mt' ]: + max_trial = int(a) + elif o in [ '-p' ]: + if a in supported_protocols: + protocol = a + else: + printInfo('protocal not supported: %s, trying %s' % (a, protocol), outfile) + elif o in [ '-t' ]: + timeout = int(a) + + if len(args) == 2: + src_surl = args[0] + dest_surl = args[1] + else: + raise getopt.GetoptError('missing source or destination SURL in command arguments.') + + except getopt.GetoptError, err: + ## close stdout/err and exit the program + printError(str(err), errfile) + outfile.close() + errfile.close() + sys.exit(2) + + ## load the checksum pickle if it exists + csumfile = 'lfc_checksum.pickle' + csum = None + if os.path.exists(csumfile): + f = open(csumfile,'r') + csum = pickle.load(f) + f.close() + + ## initialize trial count and flags + cnt_trial = 0 + isDone = False + + ## initialize the return code + rc = 0 + + ## main copy loop + while not isDone and ( cnt_trial < max_trial ): + + status = False + + cnt_trial += 1 + + ## compose copy command + ## - timeout is increasd for each trial + copy_cmd = make_copycmd(protocol, vo, timeout*cnt_trial, src_surl, dest_surl) + + try: + # use subprocess to run the user's application if the module is available on the worker node + import subprocess + printInfo('Run copy cmd: "%s"' % copy_cmd, outfile) + status = execSyscmdSubprocess(copy_cmd, outfile, errfile) + except ImportError,err: + # otherwise, use separate threads to control process IO pipes + printError('Not able to load subprocess module', errfile) + break + + printInfo( 'copy command status: %s' % repr(status), outfile ) + + if status: + ## try to get the checksum type/value stored in LFC + ## - the checksum dictionary is produced by 'make_filestager_joption.py' + ## and stored in a pickle file. + if csum and csum.has_key(src_surl): + + csum_type = csum[src_surl]['csumtype'] + csum_value = csum[src_surl]['csumvalue'] + + printInfo( 'csum_type:%s csum_value:%s' % (csum_type, csum_value), outfile ) + + if csum_type and csum_value: + + ## do checksum comparison on the downloaded file + dest_file = urisplit(dest_surl)[2] + + csum_local = '' + + if csum_type.upper() == 'MD': + csum_local = get_md5sum(dest_file) + elif csum_type.upper() == 'AD': + # slight modification on the hex string to make it compatible with what stored in LFC + csum_local = get_adler32sum(dest_file).replace('0x','').zfill(8) + else: + pass + + if csum_local.lower() == csum_value.lower(): + printInfo( '%s checksum matched: %s ( local:%s == lfc:%s )' % (csum_type, src_surl, csum_local, csum_value), outfile ) + isDone = True + else: + printInfo( '%s checksum not match: %s ( local:%s != lfc:%s )' % (csum_type, src_surl, csum_local, csum_value), outfile ) + isDone = False + else: + printInfo( 'Ignore checksum comparison: %s' % src_surl, outfile) + isDone = True + + outfile.close() + errfile.close() + + # wrapper script return code + if not isDone: + rc = 1 + + sys.exit(rc) + diff --git a/Database/FileStager/share/FileStager_exampleJobOptions.py b/Database/FileStager/share/FileStager_exampleJobOptions.py new file mode 100644 index 0000000000000000000000000000000000000000..d7aafb3af039d2f034bdd6d2609db33d17eb516d --- /dev/null +++ b/Database/FileStager/share/FileStager_exampleJobOptions.py @@ -0,0 +1,55 @@ +DetDescrVersion="ATLAS-GEO-02-01-00" ## Set your geometry here +#ConditionsTag="COMCOND-REPC-003-00" ## Set the conditions tag here;not needed for AOD + +##------------------------------------------------------------ +## These 4 lines are needed for actual data +##------------------------------------------------------------ +#from AthenaCommon.GlobalFlags import GlobalFlags +#GlobalFlags.DetGeo.set_atlas() +#GlobalFlags.DataSource.set_data() +#from IOVDbSvc.CondDB import conddb + +doTrigger = False ## Need to check +EvtMax=-1 ## number of event to process + +## the Input AOD File(s) +include ("FileStager/input_FileStagerRFCP.py") ## for rfcp +#include ("FileStager/input_FileStager.py") ## for lcg-cp + +## UserAlgs section +## include your algorithm job options here +UserAlgs=[ # 'package/yourJOwouldGoHere.py', + ] + +## Output log setting; this is for the framework in general +## You may over-ride this in your job options for your algorithm + +OutputLevel = INFO + +## Read settings; for performance DPD set ESD to true +readRDO = False +readESD = False +readAOD = True + +## Write settings; keep all of these to false. +## Control the writing of your own n-tuple in the alg's job options +doCBNT = False +doWriteESD = False +doWriteAOD = False +doWriteTAG = False + +## need these two flags to turn on lumiblockmetadatatool and output metadatastore +doDPD=True +doFileMetaData=True + +## Turn off perfmon +from RecExConfig.RecFlags import rec +rec.doPerfMon=False + +## Turn down verbosity of eventmanager +AthenaEventLoopMgr = Service( "AthenaEventLoopMgr" ) +AthenaEventLoopMgr.OutputLevel = WARNING + +## main jobOption - must always be included +include ("RecExCommon/RecExCommon_topOptions.py") + diff --git a/Database/FileStager/share/input_FileStager.py b/Database/FileStager/share/input_FileStager.py new file mode 100644 index 0000000000000000000000000000000000000000..4134f28398158bd1e5d8ee8f837018397edaabeb --- /dev/null +++ b/Database/FileStager/share/input_FileStager.py @@ -0,0 +1,92 @@ +if ('sampleList' in dir()) or ('sampleFile' in dir()): + ################################################################################################# + # Provide input for the FileStager here + ################################################################################################# + + ## import filestager tool + from FileStager.FileStagerTool import FileStagerTool + + if ('sampleList' in dir()): + stagetool = FileStagerTool(sampleList=sampleList) + elif ('sampleFile' in dir()): + print "FileStager() : Now processing sample file : %s" % sampleFile + stagetool = FileStagerTool(sampleFile=sampleFile) + + ## Configure copy command used by the stager; default is 'lcg-cp -v --vo altas -t 1200'. + stagetool.CpCommand = "wrapper_lcg-cp" + stagetool.CpArguments = [] + #stagetool.OutfilePrefix = "file:" + #stagetool.checkGridProxy = True + #stagetool.LogfileDir = "./" + + ################################################################################################# + # Configure the FileStager -- no need to change these lines + ################################################################################################# + + ## get Reference to existing Athena job + from AthenaCommon.AlgSequence import AlgSequence + thejob = AlgSequence() + + ## check if collection names begin with "gridcopy" + print "FileStager() : doStaging ?", stagetool.DoStaging() + + ## Import file stager algorithm + from FileStager.FileStagerConf import FileStagerAlg + + ## filestageralg needs to be the first algorithm added to the thejob. + if stagetool.DoStaging(): + thejob += FileStagerAlg('FileStager') + thejob.FileStager.InputCollections = stagetool.GetSampleList() + #thejob.FileStager.PipeLength = 2 + #thejob.FileStager.VerboseStager = True + #thejob.FileStager.KeepLogfiles = True + thejob.FileStager.LogfileDir = stagetool.LogfileDir + thejob.FileStager.BaseTmpdir = stagetool.GetTmpdir() + thejob.FileStager.InfilePrefix = stagetool.InfilePrefix + thejob.FileStager.OutfilePrefix = stagetool.OutfilePrefix + thejob.FileStager.CpCommand = stagetool.CpCommand + #thejob.FileStager.CpArguments = stagetool.CpArguments + thejob.FileStager.FirstFileAlreadyStaged = stagetool.StageFirstFile + thejob.FileStager.StoreStatistics = False + + ################################################################################################# + # Pass collection names to EventSelector + ################################################################################################# + + ## set input collections + ic = [] + if stagetool.DoStaging(): + ic = stagetool.GetStageCollections() + else: + ic = stagetool.GetSampleList() + + ## assume we're dealing with AODs, else ESDs + poolESDInput = False + if len(ic)>0: + if ic[0].find('ESD')>0: poolESDInput = True + + ## default: EventSelector + try: + svcMgr = theApp.serviceMgr() + svcMgr.EventSelector.InputCollections = ic + #svcMgr.EventSelector.SkipBadFiles = True + except Exception,inst: + pass + + ## else: athenaCommonFlags + if not poolESDInput: + try: + ## the Input AOD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.FilesInput = ic + except Exception,inst: + pass + else: + try: + ## the Input ESD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.FilesInput = ic + except Exception,inst: + pass + + diff --git a/Database/FileStager/share/input_FileStagerRFCP.py b/Database/FileStager/share/input_FileStagerRFCP.py new file mode 100644 index 0000000000000000000000000000000000000000..0b69f37b4c2b30ce0af0421a9a58d2a53fc0ab9a --- /dev/null +++ b/Database/FileStager/share/input_FileStagerRFCP.py @@ -0,0 +1,92 @@ +if ('sampleList' in dir()) or ('sampleFile' in dir()): + ################################################################################################# + # Provide input for the FileStager here + ################################################################################################# + + ## import filestager tool + from FileStager.FileStagerTool import FileStagerTool + + if ('sampleList' in dir()): + stagetool = FileStagerTool(sampleList=sampleList) + elif ('sampleFile' in dir()): + print "FileStager() : Processing sample file : %s" % sampleFile + stagetool = FileStagerTool(sampleFile=sampleFile) + + ## Configure rf copy command used by the stager; default is 'lcg-cp -v --vo altas -t 1200' + stagetool.CpCommand = "rfcp" + stagetool.CpArguments = [] + stagetool.OutfilePrefix = "" + stagetool.checkGridProxy = False + #stagetool.LogfileDir = "./" + + ################################################################################################# + # Configure the FileStager -- no need to change these lines + ################################################################################################# + + ## get Reference to existing Athena job + from AthenaCommon.AlgSequence import AlgSequence + thejob = AlgSequence() + + ## check if collection names begin with "gridcopy" + print "FileStager() : doStaging ?", stagetool.DoStaging() + + ## Import file stager algorithm + from FileStager.FileStagerConf import FileStagerAlg + + ## filestageralg needs to be the first algorithm added to the thejob. + if stagetool.DoStaging(): + thejob += FileStagerAlg('FileStager') + thejob.FileStager.InputCollections = stagetool.GetSampleList() + #thejob.FileStager.PipeLength = 2 + #thejob.FileStager.VerboseStager = True + #thejob.FileStager.KeepLogfiles = True + thejob.FileStager.LogfileDir = stagetool.LogfileDir + thejob.FileStager.BaseTmpdir = stagetool.GetTmpdir() + thejob.FileStager.InfilePrefix = stagetool.InfilePrefix + thejob.FileStager.OutfilePrefix = stagetool.OutfilePrefix + thejob.FileStager.CpCommand = stagetool.CpCommand + #thejob.FileStager.CpArguments = stagetool.CpArguments + thejob.FileStager.FirstFileAlreadyStaged = stagetool.StageFirstFile + thejob.FileStager.StoreStatistics = False + + ################################################################################################# + # Pass collection names to EventSelector + ################################################################################################# + + ## set input collections + ic = [] + if stagetool.DoStaging(): + ic = stagetool.GetStageCollections() + else: + ic = stagetool.GetSampleList() + + ## assume we're dealing with AODs, else ESDs + poolESDInput = False + if len(ic)>0: + if ic[0].find('ESD')>0: poolESDInput = True + + ## default: EventSelector + try: + svcMgr = theApp.serviceMgr() + svcMgr.EventSelector.InputCollections = ic + #svcMgr.EventSelector.SkipBadFiles = True + except Exception,inst: + pass + + ## else athenaCommonFlags + if not poolESDInput: + try: + ## the Input AOD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.PoolAODInput = ic + except Exception,inst: + pass + else: + try: + ## the Input ESD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.PoolESDInput = ic + except Exception,inst: + pass + + diff --git a/Database/FileStager/share/input_FileStagerRecoTrf.py b/Database/FileStager/share/input_FileStagerRecoTrf.py new file mode 100644 index 0000000000000000000000000000000000000000..3657a9a165d6813d36fe2cb32d056bc837af442a --- /dev/null +++ b/Database/FileStager/share/input_FileStagerRecoTrf.py @@ -0,0 +1,77 @@ +if ('sampleList' in dir()) or ('sampleFile' in dir()): + ################################################################################################# + # Provide input for the FileStager here + ################################################################################################# + + ## import filestager tool + from FileStager.FileStagerTool import FileStagerTool + + if ('sampleList' in dir()): + stagetool = FileStagerTool(sampleList=sampleList) + elif ('sampleFile' in dir()): + print "FileStager() : Now processing sample file : %s" % sampleFile + stagetool = FileStagerTool(sampleFile=sampleFile) + + ## Configure copy command used by the stager; default is 'lcg-cp -v --vo altas -t 1200'. + stagetool.CpCommand = "wrapper_lcg-cp" + stagetool.CpArguments = [] + #stagetool.OutfilePrefix = "file:" + #stagetool.checkGridProxy = True + #stagetool.LogfileDir = "./" + + ################################################################################################# + # Configure the FileStager -- no need to change these lines + ################################################################################################# + + ## get Reference to existing Athena job + from AthenaCommon.AlgSequence import AlgSequence + thejob = AlgSequence() + + ## check if collection names begin with "gridcopy" + print "FileStager() : doStaging ?", stagetool.DoStaging() + + ## Import file stager algorithm + from FileStager.FileStagerConf import FileStagerAlg + + ## filestageralg needs to be the first algorithm added to the thejob. + if stagetool.DoStaging(): + thejob += FileStagerAlg('FileStager') + thejob.FileStager.InputCollections = stagetool.GetSampleList() + #thejob.FileStager.PipeLength = 2 + #thejob.FileStager.VerboseStager = True + #thejob.FileStager.KeepLogfiles = True + thejob.FileStager.LogfileDir = stagetool.LogfileDir + thejob.FileStager.BaseTmpdir = stagetool.GetTmpdir() + thejob.FileStager.InfilePrefix = stagetool.InfilePrefix + thejob.FileStager.OutfilePrefix = stagetool.OutfilePrefix + thejob.FileStager.CpCommand = stagetool.CpCommand + #thejob.FileStager.CpArguments = stagetool.CpArguments + thejob.FileStager.FirstFileAlreadyStaged = stagetool.StageFirstFile + thejob.FileStager.StoreStatistics = False + + ################################################################################################# + # Pass collection names to EventSelector + ################################################################################################# + + ## set input collections + ic = [] + if stagetool.DoStaging(): + ic = stagetool.GetStageCollections() + else: + ic = stagetool.GetSampleList() + + ## assume we're dealing with AODs, else ESDs + poolESDInput = False + if len(ic)>0: + if ic[0].find('ESD')>0: poolESDInput = True + + # import run arguments + if not 'runArgs' in dir(): + from PyJobTransformsCore.runargs import RunArguments + runArgs = RunArguments() + + # Input file that contains ESD's + if poolESDInput: + runArgs.inputESDFile = ic + else: runArgs.inputFile = ic + diff --git a/Database/FileStager/share/input_FileStagerXRFCP.py b/Database/FileStager/share/input_FileStagerXRFCP.py new file mode 100644 index 0000000000000000000000000000000000000000..0ecc2e9c8e9c1c23bd4a8bd831c1d453f633cf5c --- /dev/null +++ b/Database/FileStager/share/input_FileStagerXRFCP.py @@ -0,0 +1,92 @@ + +if ('sampleList' in dir()) or ('sampleFile' in dir()): + ################################################################################################# + # Provide input for the FileStager here + ################################################################################################# + + ## import filestager tool + from FileStager.FileStagerTool import FileStagerTool + + if ('sampleList' in dir()): + stagetool = FileStagerTool(sampleList=sampleList) + elif ('sampleFile' in dir()): + print "FileStager() : Processing sample file : %s" % sampleFile + stagetool = FileStagerTool(sampleFile=sampleFile) + + ## Configure rf copy command used by the stager; default is 'lcg-cp -v --vo altas -t 1200' + stagetool.CpCommand = "/afs/cern.ch/user/p/peters/public/xrfcp/bin/xrfcp" + stagetool.CpArguments = ["-b","131072"] # to restrict file copy speed, add : -t 10 (10 Mb/s) + stagetool.OutfilePrefix = "" + stagetool.checkGridProxy = False + #stagetool.LogfileDir = "./" + + ################################################################################################# + # Configure the FileStager -- no need to change these lines + ################################################################################################# + + ## get Reference to existing Athena job + from AthenaCommon.AlgSequence import AlgSequence + thejob = AlgSequence() + + ## check if collection names begin with "gridcopy" + print "FileStager() : doStaging ?", stagetool.DoStaging() + + ## Import file stager algorithm + from FileStager.FileStagerConf import FileStagerAlg + + ## filestageralg needs to be the first algorithm added to the thejob. + if stagetool.DoStaging(): + thejob += FileStagerAlg('FileStager') + thejob.FileStager.InputCollections = stagetool.GetSampleList() + #thejob.FileStager.PipeLength = 2 + #thejob.FileStager.VerboseStager = True + #thejob.FileStager.KeepLogfiles = True + thejob.FileStager.LogfileDir = stagetool.LogfileDir + thejob.FileStager.BaseTmpdir = stagetool.GetTmpdir() + thejob.FileStager.InfilePrefix = stagetool.InfilePrefix + thejob.FileStager.OutfilePrefix = stagetool.OutfilePrefix + thejob.FileStager.CpCommand = stagetool.CpCommand + #thejob.FileStager.CpArguments = stagetool.CpArguments + thejob.FileStager.FirstFileAlreadyStaged = stagetool.StageFirstFile + thejob.FileStager.StoreStatistics = False + + ################################################################################################# + # Pass collection names to EventSelector + ################################################################################################# + + ## set input collections + ic = [] + if stagetool.DoStaging(): + ic = stagetool.GetStageCollections() + else: + ic = stagetool.GetSampleList() + + ## assume we're dealing with AODs, else ESDs + poolESDInput = False + if len(ic)>0: + if ic[0].find('ESD')>0: poolESDInput = True + + ## default: EventSelector + try: + svcMgr = theApp.serviceMgr() + svcMgr.EventSelector.InputCollections = ic + #svcMgr.EventSelector.SkipBadFiles = True + except Exception,inst: + pass + + ## else: athenaCommonFlags + if not poolESDInput: + try: + ## the Input AOD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.PoolAODInput = ic + except Exception,inst: + pass + else: + try: + ## the Input ESD File(s) + from AthenaCommon.AthenaCommonFlags import athenaCommonFlags + athenaCommonFlags.PoolESDInput = ic + except Exception,inst: + pass + diff --git a/Database/FileStager/src/FileStagerAlg.cxx b/Database/FileStager/src/FileStagerAlg.cxx new file mode 100644 index 0000000000000000000000000000000000000000..ab9bc258d1ec56799b3f5d34d793e7183410fbcf --- /dev/null +++ b/Database/FileStager/src/FileStagerAlg.cxx @@ -0,0 +1,271 @@ +/* + Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +*/ + +// STL includes +#include <iterator> + +#include "GaudiKernel/MsgStream.h" +#include "FileStager/FileStagerAlg.h" + +#include "GaudiKernel/FileIncident.h" +#include "GaudiKernel/ServiceHandle.h" +#include "GaudiKernel/IIncidentSvc.h" + +#include "FileStager/TStageManager.h" +#include "FileStager/TCopyFile.h" +#include <TFile.h> +#include <TTree.h> +#include <TStopwatch.h> +#include <TH1D.h> +#include <TFile.h> + +FileStagerAlg::FileStagerAlg(const std::string& name, ISvcLocator* pSvcLocator) + : Algorithm(name, pSvcLocator) + , m_pipeLength(1) + , m_verbose(false) + , m_verboseWait(true) + , m_firstFileAlreadyStaged(false) + , m_treeName("CollectionTree") + , m_infilePrefix("gridcopy://") + , m_outfilePrefix("file:") + , m_cpCommand("lcg-cp") + , m_baseTmpdir("") + , m_logfileDir("") + , m_keepLogfiles(false) + , m_storeStats(false) + , _numEventsInFile(0) + , _event(0) + , _prevFile("") + , _stopwatch(0) + , _waittime(0) + , _waithist(0) +{ + // Declare the properties + declareProperty("PipeLength", m_pipeLength); + declareProperty("VerboseStager", m_verbose); + declareProperty("VerboseWaiting", m_verboseWait); + declareProperty("FirstFileAlreadyStaged",m_firstFileAlreadyStaged); + declareProperty("TreeName",m_treeName); + declareProperty("InfilePrefix", m_infilePrefix); + declareProperty("OutfilePrefix", m_outfilePrefix); + declareProperty("CpCommand", m_cpCommand); + declareProperty("BaseTmpdir", m_baseTmpdir); + declareProperty("CpArguments", m_cpArg, "vector of cp arguments"); + declareProperty("InputCollections", m_inCollection, "vector of input files"); + declareProperty("OutputCollections", m_outCollection, "vector of output files"); + declareProperty("LogfileDir", m_logfileDir); + declareProperty("KeepLogfiles", m_keepLogfiles); + declareProperty("StoreStatistics", m_storeStats); + + _stopwatch = new TStopwatch(); + _waithist = new TH1D("stagehist","Time for next file to finish staging (s)",600,0.,60.); + + // file statistics about number of bytes processed by root + TCopyFile::PrintInfo(); +} + + +FileStagerAlg::~FileStagerAlg() +{ + if (_stopwatch) delete _stopwatch; + if (_waithist) delete _waithist; + + // file statistics about number of bytes processed by root + TCopyFile::PrintInfo(); + + // release last file + // ... assuming is has been closed by pool ;-) + + //TStageManager& manager(TStageManager::instance()); + //manager.releaseAll(); +} + + +StatusCode +FileStagerAlg::initialize() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << name() << "Initialize()" << endreq; + + // use the incident service to register + IIncidentSvc* incsvc = 0; + StatusCode status = service("IncidentSvc", incsvc, true); + + if(status.isFailure() || incsvc==0) { + log << MSG::WARNING << "Unable to get IncidentSvc! MF mechanism is disabled" << endreq; + return StatusCode::SUCCESS; + } + + incsvc->addListener(this, "BeginInputFile", 60); // pri has to be < 100 to be after MetaDataSvc. + incsvc->addListener(this, "EndInputFile", 0); + + log << MSG::DEBUG << "Added listeners on begin and end of input files." << endreq; + + // configure the stager + configStager(); + // fill up the stager + loadStager(); + + setupNextFile(); + + log << MSG::DEBUG << name() << "Initialize() successful" << endreq; + return StatusCode::SUCCESS; +} + + +StatusCode +FileStagerAlg::execute() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << name() << "Execute()" << endreq; + + log << MSG::DEBUG << name() << "Execute() successful" << endreq; + return StatusCode::SUCCESS; +} + + +StatusCode +FileStagerAlg::finalize() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << name() << "Finalize()" << endreq; + log << MSG::WARNING << "Total wait time = " << _waittime << " s." << endreq; + + if (m_storeStats) { + std::string fileName = "filestager_stats.root"; + log << MSG::WARNING << "Writing filestager statistics to file <" << fileName << ">" << endreq; + TFile *ff = new TFile(fileName.c_str(),"recreate"); + _waithist->Write(); + ff->Close(); + delete ff; + } + + // release remaining staged files in destructor + + log << MSG::DEBUG << name() << "Finalize() successful" << endreq; + return StatusCode::SUCCESS; +} + + +void +FileStagerAlg::configStager() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << "configStager()" << endreq; + + // itr for looping over input files + _fItr = m_inCollection.begin(); + + // stager settings + TStageManager& manager(TStageManager::instance()); + + manager.setPipeLength(m_pipeLength); + manager.verbose(m_verbose); + manager.verboseWait(m_verboseWait); + manager.firstFileAlreadyStaged(m_firstFileAlreadyStaged); + manager.keepLogfiles(m_keepLogfiles); + + if (!m_infilePrefix.empty()) + manager.setInfilePrefix(m_infilePrefix.c_str()); + if (!m_outfilePrefix.empty()) + manager.setOutfilePrefix(m_outfilePrefix.c_str()); + if (!m_cpCommand.empty()) + manager.setCpCommand(m_cpCommand.c_str()); + if (!m_baseTmpdir.empty()) + manager.setBaseTmpdir(m_baseTmpdir.c_str()); + if (!m_logfileDir.empty()) + manager.setLogfileDir(m_logfileDir.c_str()); + + for (int i=0; i<int(m_cpArg.size()); ++i) + manager.addCpArg(m_cpArg[i].c_str()); +} + + +void +FileStagerAlg::loadStager() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << "loadStager()" << endreq; + + TStageManager& manager(TStageManager::instance()); + + m_outCollection.clear(); + std::vector< std::string >::iterator itr = m_inCollection.begin(); + + // ensure deletion of first staged file + if (!m_inCollection.empty()) + _prevFile = m_inCollection[0].c_str(); + + // add files and start staging ... + for (; itr!=m_inCollection.end(); ++itr) { + manager.addToList(itr->c_str()); + std::string outColl = manager.getTmpFilename(itr->c_str()); + m_outCollection.push_back( outColl ); + } +} + + +void +FileStagerAlg::releasePrevFile() +{ + // release previous file (not previous file) to avoid possible conflicts w/ pool + if (!_prevFile.empty()) { + TStageManager& manager(TStageManager::instance()); + manager.releaseFile(_prevFile.c_str()); + } + + if (_fItr!=m_inCollection.end()) { + _prevFile = *_fItr; + } +} + + +void +FileStagerAlg::setupNextFile() +{ + MsgStream log(msgSvc(), name()); + log << MSG::DEBUG << "setupNextFile()" << endreq; + + if (_fItr!=m_inCollection.end()) { + TStageManager& manager(TStageManager::instance()); + + // wait till file finishes staging ... + _stopwatch->Start(); + (void) manager.getFile(_fItr->c_str()); + _stopwatch->Stop(); + + // collect wait time statistics + _waithist->Fill(_stopwatch->RealTime()); + _waittime += _stopwatch->RealTime(); + + log << MSG::DEBUG << "Time to wait for <" << _fItr->c_str() << "> = " << _stopwatch->RealTime() << " s." << endreq; + + ++_fItr; + } +} + + +void +FileStagerAlg::handle(const Incident& inc) +{ + MsgStream log(msgSvc(), name()); + + const FileIncident* fileInc = dynamic_cast<const FileIncident*>(&inc); + if (fileInc == 0) { + log << MSG::ERROR << " Unable to get FileName from BeginInputFile/EndInputFile incident" << endreq; + return; + } + + const std::string fileName = fileInc->fileName(); + log << MSG::DEBUG << "handle() " << inc.type() << " for " << fileName << endreq; + + if (inc.type() == "BeginInputFile") { + // do nothing + } else if (inc.type() == "EndInputFile") { + releasePrevFile(); + // do nothing with current file to avoid conflicts with pool + setupNextFile(); + } +} + diff --git a/Database/FileStager/src/components/FileStager_entries.cxx b/Database/FileStager/src/components/FileStager_entries.cxx new file mode 100644 index 0000000000000000000000000000000000000000..d273f237c4416c55bf61ebb54627a40e78c533e4 --- /dev/null +++ b/Database/FileStager/src/components/FileStager_entries.cxx @@ -0,0 +1,10 @@ +#include "FileStager/FileStagerAlg.h" + +#include "GaudiKernel/DeclareFactoryEntries.h" + +DECLARE_ALGORITHM_FACTORY( FileStagerAlg ) + +DECLARE_FACTORY_ENTRIES(FileStager) { + DECLARE_ALGORITHM( FileStagerAlg ) +} + diff --git a/Database/FileStager/src/components/FileStager_load.cxx b/Database/FileStager/src/components/FileStager_load.cxx new file mode 100644 index 0000000000000000000000000000000000000000..3cb8371756f18c2a1f70b98e064a8a532b572e33 --- /dev/null +++ b/Database/FileStager/src/components/FileStager_load.cxx @@ -0,0 +1,4 @@ +#include "GaudiKernel/LoadFactoryEntries.h" + +LOAD_FACTORY_ENTRIES(FileStager) +