#!/usr/bin/env python """ _RuntimeSAMStageOut_ Test script for using TFC to determine local stage out is working properly at a site """ import time import os import sys from WMCore.Storage.SiteLocalConfig import loadSiteLocalConfig from WMCore.Storage.Registry import retrieveStageOutImpl import WMCore.Storage.Backends import WMCore.Storage.Plugins class StageOutDiagnostic: """ _StageOutDiagnostic_ Object to test the local stage out details step by step """ def __init__(self): self.summary = {} self.summary.setdefault('SiteConf' , "NotRun" ) self.summary.setdefault('TFC' , "NotRun") self.summary.setdefault('LocalStageOut' , "NotRun") self.summary.setdefault('CleanUp' , "NotRun") self.status = 0 self.siteConf = None self.tfc = None self.datestamp = time.asctime(time.localtime(time.time())) self.datestamp = self.datestamp.replace(" ", "-").replace(":", "_") self.testLFN = "/store/unmerged/SAM/StageOutTest-%i-%s" % (os.getpid(), self.datestamp) def __call__(self): """ _operator()_ Invoke step by step tests and create the summary """ try: self.testSiteConf() except Exception, ex: print str(ex) self.status = 1 self.complete() return try: self.testTFC() except Exception, ex: print str(ex) self.status = 2 self.complete() return try: self.testLocalStageOut() except Exception, ex: print str(ex) self.status = 3 self.complete() return try: self.testCleanUp() except Exception, ex: print str(ex) self.status = 4 self.complete() return def complete(self): """ _complete_ Print summary after tests """ msg = "==== StageOut Test Summary ====\n" if self.status != 0: msg += "Status: FAILED: %s\n" % self.status else: msg += "Test Successful\n" for key, val in self.summary.items(): msg += " Test: %s : %s\n" % (key, val) print msg return def testSiteConf(self): """ _testSiteConf_ Test reading the site conf file """ if not os.environ.has_key("CMS_PATH"): msg = "CMS_PATH Not Set: Cannot find SiteConf" self.summary['SiteConf'] = "Failed: CMS_PATH not set" raise RuntimeError, msg try: self.siteConf = loadSiteLocalConfig() except Exception, ex: msg = "Error loading Site Conf File: %s" % str(ex) self.summary['SiteConf'] = "Failed: Cannot load SiteConf" raise RuntimeError, msg if self.siteConf.localStageOut['command'] == None: msg = "LocalStageOut Command is not set" self.summary['SiteConf'] = \ "Failed: local-stage-out command not set" raise RuntimeError, msg if self.siteConf.localStageOut['se-name'] == None: msg = "LocalStageOut SE Name is not set" self.summary['SiteConf'] = \ "Failed: local-stage-out se-name not set" raise RuntimeError, msg if self.siteConf.localStageOut['catalog'] == None: msg = "LocalStageOut Catalog is not set" self.summary['SiteConf'] = \ "Failed: local-stage-out catalog not set" raise RuntimeError, msg msg = "SiteConf test successful:\n" for key, val in self.siteConf.localStageOut.items(): msg += " %s = %s\n" % (key, val) print msg return def testTFC(self): """ _testTFC_ Test the Trivial File Catalog """ try: self.tfc = self.siteConf.trivialFileCatalog() except Exception, ex: msg = "Failed to load Trivial File Catalog: %s" % str(ex) self.summary['TFC'] = "Failed: Cannot load TFC" raise RuntimeError, msg sampleLFN = "/store/unmerged/testLFN" try: samplePFN = self.tfc.matchLFN(self.tfc.preferredProtocol, sampleLFN) except Exception, ex: msg = "Failed to translate LFN: %s" % str(ex) self.summary['TFC'] = "Failed: Cannot translate LFN to PFN" raise RuntimeError, msg msg = "TFC test successful:\n" msg += "Mapped LFN: %s\n To PFN: %s\n" % (sampleLFN, samplePFN) # msg += "Using Catalog Rules:\n" # msg += str(self.tfc) print msg return def testLocalStageOut(self): """ _testLocalStageOut_ Test a local stage out """ wasSuccessful = False msg = "" handle = open("TEST-FILE", 'w') for i in range(0, 1000): handle.write("This is a test file\n") handle.close() sourcePFN = os.path.join(os.getcwd(), "TEST-FILE") seName = self.siteConf.localStageOut['se-name'] command = self.siteConf.localStageOut['command'] options = self.siteConf.localStageOut.get('option', None) protocol = self.tfc.preferredProtocol targetPFN = self.tfc.matchLFN(self.tfc.preferredProtocol, self.testLFN) # first try the regular stageout try: # an exception around normal stageout try: impl = retrieveStageOutImpl(command) except Exception, ex: msg += "Unable to retrieve impl for local stage out:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command,) self.summary['LocalStageOut'] = \ "Failure: Cant retrieve StageOut Impl" raise RuntimeError, msg try: impl.retryPause = 15 impl(protocol, sourcePFN, targetPFN, options) wasSuccessful = True except Exception, ex: msg += "Failure for local stage out:\n" msg += str(ex) self.summary['LocalStageOut'] = \ "Failure: Local Stage Out Failed" raise RuntimeError, msg if wasSuccessful: return except RuntimeError, ex: ### FALLBACK ### ### there are N fallbacks in a list called fallbackStageOut ### for fallbackCount in range(len(self.siteConf.fallbackStageOut)): seName = self.siteConf.fallbackStageOut[fallbackCount]['se-name'] command = self.siteConf.fallbackStageOut[fallbackCount]['command'] options = self.siteConf.fallbackStageOut[fallbackCount].get('option', None) if self.siteConf.fallbackStageOut[fallbackCount]['lfn-prefix']: targetPFN = self.siteConf.fallbackStageOut[fallbackCount]['lfn-prefix'] + self.testLFN else: targetPFN = self.testLFN try: impl = retrieveStageOutImpl(command) except Exception, ex: msg += "Unable to retrieve impl for local stage out:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command,) self.summary['LocalStageOut'] += \ "\nFailure: Cant retrieve StageOut Impl for fallback %s" % fallbackCount raise RuntimeError, msg try: impl.retryPause = 15 impl(protocol, sourcePFN, targetPFN, options) wasSuccessful = True except Exception, ex: msg += "Failure for local stage out:\n" msg += str(ex) self.summary['LocalStageOut'] += \ "\nFailure: Fallback %s Stage Out Failed" % fallbackCount raise RuntimeError, msg if wasSuccessful: self.summary['LocalStageOut'] = "" return # If we got here, nothing worked raise RuntimeError, msg def testCleanUp(self): """ _testCleanUp_ Test clean up of the temp file from SE """ os.remove( "TEST-FILE" ) commandList = [ self.siteConf.localStageOut[ 'command' ] ] pfnList = [ self.tfc.matchLFN(self.tfc.preferredProtocol, self.testLFN) ] for fallback in self.siteConf.fallbackStageOut: commandList.append( fallback[ 'command' ]) if fallback['lfn-prefix']: pfnList.append( fallback[ 'lfn-prefix' ] + self.testLFN ) else: pfnList.append( self.testLFN ) wasSuccessful = False msg = "" for (command, pfn) in zip( commandList, pfnList ): try: # outer try to catch the fallback as a whole try: # inner try for getting the impl implInstance = retrieveStageOutImpl(command) except Exception, ex: msg += "Unable to retrieve impl for clean up:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % (command,) self.summary['CleanUp'] = "Failure: Cant retrieve StageOut Impl" raise RuntimeError, msg # // # // Invoke StageOut Impl removeFile method #// try: # inner try for calling removeFile implInstance.removeFile(pfn) except Exception, ex: msg += "Error performing Cleanup command for impl " msg += "%s\n" % command msg += "On PFN: %s\n" % pfn msg += str(ex) self.summary['CleanUp'] = "Failure: Cleanup operation Failed" raise RuntimeError, msg wasSuccessful = True except: # except for outer try wasSuccessful = False # See if this fallback worked if wasSuccessful: self.summary['CleanUp'] = "" return else: msg += "Trying Fallback...\n" # nothing worked, bomb out raise RuntimeError, msg if __name__ == '__main__': diagnostic = StageOutDiagnostic() diagnostic() sys.exit(diagnostic.status)