TrigCostSvc.cxx 21.8 KB
Newer Older
Tim Martin's avatar
Tim Martin committed
1
/*
Aleksandra Poreba's avatar
Aleksandra Poreba committed
2
  Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration
Tim Martin's avatar
Tim Martin committed
3
4
*/

5
#include "AthenaKernel/SlotSpecificObj.h"
6
#include "TrigConfHLTUtils/HLTUtils.h"
7

8
#include "TrigCostSvc.h"
9

10
11
#include <mutex>  // For std::unique_lock

Tim Martin's avatar
Tim Martin committed
12
13
/////////////////////////////////////////////////////////////////////////////

14
TrigCostSvc::TrigCostSvc(const std::string& name, ISvcLocator* pSvcLocator) :
15
base_class(name, pSvcLocator), // base_class = AthService
16
m_eventSlots(),
17
m_eventMonitored(),
18
m_slotMutex(),
19
m_globalMutex(),
20
m_algStartInfo(),
21
m_algStopTime(),
22
23
24
m_threadToAlgMap(),
m_threadToCounterMap(),
m_threadCounter(0)
25
{
26
  ATH_MSG_DEBUG("TrigCostSvc regular constructor");
Tim Martin's avatar
Tim Martin committed
27
28
29
30
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

31
TrigCostSvc::~TrigCostSvc() {
32
  // delete[] m_eventMonitored;
33
  ATH_MSG_DEBUG("TrigCostSvc destructor()");
Tim Martin's avatar
Tim Martin committed
34
35
36
37
38
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *


39
40
StatusCode TrigCostSvc::initialize() {
  ATH_MSG_DEBUG("TrigCostSvc initialize()");
41
  m_eventSlots = SG::getNSlots();
42
43
44
45
46
47
  // TODO Remove this when the configuration is correctly propagated in config-then-run jobs
  if (!m_eventSlots) {
    ATH_MSG_WARNING("numConcurrentEvents() == 0. This is a misconfiguration, probably coming from running from pickle. "
      "Setting local m_eventSlots to a 'large' number until this is fixed to allow the job to proceed.");
    m_eventSlots = 100;
  }
48
  ATH_MSG_INFO("Initializing TrigCostSvc with " << m_eventSlots << " event slots");
49

50
51
  // We cannot have a vector here as atomics are not movable nor copyable. Unique heap arrays are supported by C++
  m_eventMonitored = std::make_unique< std::atomic<bool>[] >( m_eventSlots );
52
53
  m_slotMutex = std::make_unique< std::shared_mutex[] >( m_eventSlots );

54
55
  for (size_t i = 0; i < m_eventSlots; ++i) m_eventMonitored[i] = false;

56
  ATH_CHECK(m_algStartInfo.initialize(m_eventSlots));
57
  ATH_CHECK(m_algStopTime.initialize(m_eventSlots));
58
  ATH_CHECK(m_rosData.initialize(m_eventSlots));
59
60
61
62
63
64

  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

65
66
StatusCode TrigCostSvc::finalize() {
  ATH_MSG_DEBUG("TrigCostSvc finalize()");
67
68
69
70
  if (m_saveHashes) {
    TrigConf::HLTUtils::hashes2file();
    ATH_MSG_INFO("Calling hashes2file, saving dump of job's HLT hashing dictionary to disk.");
  }
71
72
73
74
  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Tim Martin's avatar
Tim Martin committed
75

76
StatusCode TrigCostSvc::startEvent(const EventContext& context, const bool enableMonitoring) {
77
  const bool monitoredEvent = (enableMonitoring || m_monitorAllEvents);
78
  ATH_CHECK(checkSlot(context));
79
80
81

  m_eventMonitored[ context.slot() ] = false;

82
83
84
85
86
87
88
  {
    // "clear" is a whole table operation, we need it all to ourselves
    std::unique_lock lockUnique( m_slotMutex[ context.slot() ] );
    if (monitoredEvent) {
      // Empty transient thread-safe stores in preparation for recording this event's cost data
      ATH_CHECK(m_algStartInfo.clear(context, msg()));
      ATH_CHECK(m_algStopTime.clear(context, msg()));
89
      ATH_CHECK(m_rosData.clear(context, msg()));
90
91
92
93
    }

    // Enable collection of data in this slot for monitoredEvents
    m_eventMonitored[ context.slot() ] = monitoredEvent;
94
  }
95

Rafal Bielski's avatar
Rafal Bielski committed
96
  // As we missed the AuditType::Before of the HLTSeeding (which is calling this TrigCostSvc::startEvent), let's add it now.
97
  // This will be our canonical initial timestamps for measuring this event. Similar will be done for DecisionSummaryMakerAlg at the end
Rafal Bielski's avatar
Rafal Bielski committed
98
  ATH_CHECK(processAlg(context, m_hltSeedingName, AuditType::Before));
99

Tim Martin's avatar
Tim Martin committed
100
101
102
103
104
  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

105
StatusCode TrigCostSvc::processAlg(const EventContext& context, const std::string& caller, const AuditType type) {
106
  ATH_CHECK(checkSlot(context));
107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  TrigTimeStamp now;

  // Do per-event within-slot monitoring
  if (m_eventMonitored[ context.slot() ]) {
    // Multiple simultaneous calls allowed here, adding their data to the concurrent map.
    std::shared_lock lockShared( m_slotMutex[ context.slot() ] );

    AlgorithmIdentifier ai = AlgorithmIdentifierMaker::make(context, caller, msg());
    ATH_CHECK( ai.isValid() );

    ATH_CHECK(monitor(context, ai, now, type));

    ATH_MSG_VERBOSE("Caller '" << caller << "', '" << ai.m_store << "', slot:" << context.slot() << " "
      << (type == AuditType::Before ? "BEGAN" : "ENDED") << " at " << now.microsecondsSinceEpoch());
  }

  // MultiSlot mode: do per-event monitoring of all slots, but saving the data within the master-slot
  if (m_enableMultiSlot && context.slot() != m_masterSlot && m_eventMonitored[ m_masterSlot ]) {
    std::shared_lock lockShared( m_slotMutex[ m_masterSlot ] );

    // Note: we override the storage location of these data from all other slots to be saved in the MasterSlot
    AlgorithmIdentifier ai = AlgorithmIdentifierMaker::make(context, caller, msg(), m_masterSlot);
    ATH_CHECK( ai.isValid() );

    ATH_CHECK(monitor(context, ai, now, type));
  }

  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

140
StatusCode TrigCostSvc::monitor(const EventContext& context, const AlgorithmIdentifier& ai, const TrigTimeStamp& now, const AuditType type) {
141

142
  if (type == AuditType::Before) {
143

144
    AlgorithmPayload ap {
145
      now,
146
147
148
149
      std::this_thread::get_id(),
      getROIID(context),
      static_cast<uint32_t>(context.slot())
    };
150
    ATH_CHECK( m_algStartInfo.insert(ai, ap) );
151

152
    // Cache the AlgorithmIdentifier which has just started executing on this thread
153
154
155
156
157
    if (ai.m_realSlot == ai.m_slotToSaveInto) {
      tbb::concurrent_hash_map<std::thread::id, AlgorithmIdentifier, ThreadHashCompare>::accessor acc;
      m_threadToAlgMap.insert(acc, ap.m_algThreadID);
      acc->second = ai;
    }
158

159
160
  } else if (type == AuditType::After) {

161
162
163
    ATH_CHECK( m_algStopTime.insert(ai, now) );

  } else {
164

165
166
    ATH_MSG_ERROR("Only expecting AuditType::Before or AuditType::After");
    return StatusCode::FAILURE;
167
168
169

  }

Tim Martin's avatar
Tim Martin committed
170
  return StatusCode::SUCCESS;
171
172
}

Tim Martin's avatar
Tim Martin committed
173
174
175

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

176
StatusCode TrigCostSvc::monitorROS(const EventContext& /*context*/, robmonitor::ROBDataMonitorStruct payload){
177
178
179
180
181
182
183
184
185
186
187
  ATH_MSG_DEBUG( "Received ROB payload " << payload );

  // Associate payload with an algorithm
  AlgorithmIdentifier theAlg;
  {
    tbb::concurrent_hash_map<std::thread::id, AlgorithmIdentifier, ThreadHashCompare>::const_accessor acc;
    ATH_CHECK( m_threadToAlgMap.find(acc, std::this_thread::get_id()) );
    theAlg = acc->second;
  }

  // Record data in TrigCostDataStore
Aleksandra Poreba's avatar
Aleksandra Poreba committed
188
  ATH_MSG_DEBUG( "Adding ROBs from" << payload.requestor_name << " to " << theAlg.m_hash );
189
  ATH_CHECK( m_rosData.push_back(theAlg, std::move(payload)) );
190
191
192
193
194
195
196

  return StatusCode::SUCCESS;
}


// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

197
StatusCode TrigCostSvc::endEvent(const EventContext& context, SG::WriteHandle<xAOD::TrigCompositeContainer>& costOutputHandle, SG::WriteHandle<xAOD::TrigCompositeContainer>& rosOutputHandle) { 
198
  ATH_CHECK(checkSlot(context));
199
200
201
202
203
  if (m_eventMonitored[ context.slot() ] == false) {
    // This event was not monitored - nothing to do.
    ATH_MSG_DEBUG("Not a monitored event.");
    return StatusCode::SUCCESS;
  }
204

205
  // As we will miss the AuditType::After of the DecisionSummaryMakerAlg (which is calling this TrigCostSvc::endEvent), let's add it now.
Rafal Bielski's avatar
Rafal Bielski committed
206
  // This will be our canonical final timestamps for measuring this event. Similar was done for HLTSeeding at the start
207
208
  ATH_CHECK(processAlg(context, m_decisionSummaryMakerAlgName, AuditType::After));

209
  // Reset eventMonitored flags
210
  m_eventMonitored[ context.slot() ] = false;
211
212

  // Now that this atomic is set to FALSE, additional algs in this instance which trigger this service will 
213
  // not be able to call TrigCostSvc::monitor
214
215
216
217
218
219

  // ... but processAlg might already be running in other threads... 
  // Wait to obtain an exclusive lock.
  std::unique_lock lockUnique( m_slotMutex[ context.slot() ] );
  
  // we can now perform whole-map inspection of this event's TrigCostDataStores without the danger that it will be changed further
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
  // Let's start by getting the global STOP time we just wrote
  uint64_t eventStopTime = 0;
  {
    const AlgorithmIdentifier myAi = AlgorithmIdentifierMaker::make(context, m_decisionSummaryMakerAlgName, msg());
    ATH_CHECK( myAi.isValid() );
    tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
    if (m_algStopTime.retrieve(myAi, stopTimeAcessor).isFailure()) {
      ATH_MSG_ERROR("No end time for '" << myAi.m_caller << "', '" << myAi.m_store << "'"); // Error as we JUST entered this info!
    } else { // retrieve was a success
      eventStopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
    }
  }

  // And the global START time for the event
  uint64_t eventStartTime = 0;
  {
Rafal Bielski's avatar
Rafal Bielski committed
237
238
    const AlgorithmIdentifier hltSeedingAi = AlgorithmIdentifierMaker::make(context, m_hltSeedingName, msg());
    ATH_CHECK( hltSeedingAi.isValid() );
239
    tbb::concurrent_hash_map<AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_accessor startAcessor;
Rafal Bielski's avatar
Rafal Bielski committed
240
241
    if (m_algStartInfo.retrieve(hltSeedingAi, startAcessor).isFailure()) {
      ATH_MSG_ERROR("No alg info for '" << hltSeedingAi.m_caller << "', '" << hltSeedingAi.m_store << "'"); // Error as we know this info must be present
242
243
244
245
246
    } else { // retrieve was a success
      eventStartTime = startAcessor->second.m_algStartTime.microsecondsSinceEpoch();
    }
  }

247
  // Read payloads. Write to persistent format
248
249
250
251
  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator beginIt;
  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator endIt;
  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator it;
  ATH_CHECK(m_algStartInfo.getIterators(context, msg(), beginIt, endIt));
252

253
254
  ATH_MSG_DEBUG("Monitored event with " << std::distance(beginIt, endIt) << " AlgorithmPayload objects.");

255
  std::map<size_t, size_t> aiToHandleIndex;
256
  for (it = beginIt; it != endIt; ++it) {
257
    const AlgorithmIdentifier& ai = it->first;
258
    const AlgorithmPayload& ap = it->second;
259
    uint64_t startTime = ap.m_algStartTime.microsecondsSinceEpoch();
260

261
262
    // Can we find the end time for this alg? If not, it is probably still running. Hence we use "now" as the default time.
    uint64_t stopTime = eventStopTime;
263
264
265
    {
      tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
      if (m_algStopTime.retrieve(ai, stopTimeAcessor).isFailure()) {
266
        ATH_MSG_DEBUG("No end time for '" << ai.m_caller << "', '" << ai.m_store << "'");
267
268
      } else { // retrieve was a success
        stopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
269
      }
270
      // stopTimeAcessor goes out of scope - lock released
271
272
    }

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
    // It is possible (when in the master-slot) to catch just the END of an Alg's exec from another slot, and then the START of the same
    // alg executing in the next event in that same other-slot.
    // This gives us an end time which is before the start time. Disregard these entries.
    if (startTime > stopTime) {
      ATH_MSG_VERBOSE("Disregard start-time:" << startTime << " > stop-time:" << stopTime 
        << " for " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " in slot " << ap.m_slot << ", this is slot " << context.slot());
      continue;
    }

    // Lock the start and stop times to be no later than eventStopTime.
    // E.g. it's possible for an alg in another slot to start or stop running after 'processAlg(context, m_decisionSummaryMakerAlgName, AuditType::After))'
    // but before 'lockUnique( m_slotMutex[ context.slot() ] )', creating a timestamp after the nominal end point for this event.
    // If the alg starts afterwards, we disregard it in lieu of setting to have zero walltime.
    // If the alg stops afterwards, we truncate its stop time to be no later than eventStopTime
    if (startTime > eventStopTime) {
      ATH_MSG_VERBOSE("Disregard " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " as it started after endEvent() was finished being called" );
      continue;
    }
    if (stopTime > eventStopTime) {
      ATH_MSG_VERBOSE(TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " stopped after endEvent() was called, but before the cost container was locked," 
        << " truncating its ending time stamp from " << stopTime << " to " << eventStopTime);
      stopTime = eventStopTime;
    }

    // Do the same, locking the start and stop times to be no earlier than eventStartTime
    // If the alg stops before eventStartTime, we disregard it in lieu of setting it to have zero walltime
    // If the alg starts before eventStartTime, we truncate its start time to be no later than eventStopTime
    if (stopTime < eventStartTime) {
      ATH_MSG_VERBOSE("Disregard " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " as it stopped before startEvent() was finished being called" );
      continue;
    }
    if (startTime < eventStartTime) {
Rafal Bielski's avatar
Rafal Bielski committed
305
      ATH_MSG_VERBOSE(TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " started just after the cost container was unlocked, but before the HLTSeeding record was written." 
306
307
308
309
        << " truncating its starting time stamp from " << startTime << " to " << eventStartTime);
      startTime = eventStartTime;
    }

310
311
    // Make a new TrigComposite to persist monitoring payload for this alg
    xAOD::TrigComposite* tc = new xAOD::TrigComposite();
312
    costOutputHandle->push_back( tc ); 
313
314
    // tc is now owned by storegate and, and has an aux store provided by the TrigCompositeCollection

315
    const uint32_t threadID = static_cast<uint32_t>( std::hash< std::thread::id >()(ap.m_algThreadID) );
316
317
318
319
320
321
322
323
324
325
326
    uint32_t threadEnumerator = 0; 
    {
      // We can have multiple slots get here at the same time
      std::lock_guard<std::mutex> lock(m_globalMutex);
      const std::unordered_map<uint32_t, uint32_t>::const_iterator mapIt = m_threadToCounterMap.find(threadID);
      if (mapIt == m_threadToCounterMap.end()) {
        threadEnumerator = m_threadCounter;
        m_threadToCounterMap.insert( std::make_pair(threadID, m_threadCounter++) );
      } else {
        threadEnumerator = mapIt->second;
      }
327
328
    }

329
    bool result = true;
330
331
    result &= tc->setDetail("alg", ai.callerHash());
    result &= tc->setDetail("store", ai.storeHash());
332
    result &= tc->setDetail("view", ai.m_viewID);
333
    result &= tc->setDetail("thread", threadEnumerator);
334
    result &= tc->setDetail("thash", threadID);
335
    result &= tc->setDetail("slot", ap.m_slot);
336
    result &= tc->setDetail("roi", ap.m_algROIID);
Tim Martin's avatar
Tim Martin committed
337
    result &= tc->setDetail("start", startTime);
338
    result &= tc->setDetail("stop", stopTime);
339
    if (!result) ATH_MSG_WARNING("Failed to append one or more details to trigger cost TC");
340
341
342
343

    aiToHandleIndex[ai.m_hash] = costOutputHandle->size() - 1;
  }

Aleksandra Poreba's avatar
Aleksandra Poreba committed
344
  typedef tbb::concurrent_hash_map< AlgorithmIdentifier, std::vector<robmonitor::ROBDataMonitorStruct>, AlgorithmIdentifierHashCompare>::const_iterator ROBConstIt;
345
346
347
348
349
350
351
  ROBConstIt beginRob;
  ROBConstIt endRob;
  
  ATH_CHECK(m_rosData.getIterators(context, msg(), beginRob, endRob));
  
  for (ROBConstIt it = beginRob; it != endRob; ++it) {
    size_t aiHash = it->first.m_hash;
352

353
    if (aiToHandleIndex.count(aiHash) == 0) {
354
      ATH_MSG_WARNING("Algorithm with hash " << aiHash << " not found!");
355
356
    }

357
    // Save ROB data via TrigComposite
Aleksandra Poreba's avatar
Aleksandra Poreba committed
358
    for (const robmonitor::ROBDataMonitorStruct& robData : it->second) {
359
360
361
      xAOD::TrigComposite* tc = new xAOD::TrigComposite();
      rosOutputHandle->push_back(tc); 

Aleksandra Poreba's avatar
Aleksandra Poreba committed
362
363
364
365
      // Retrieve ROB requests data into primitives vectors
      std::vector<uint32_t> robs_id;
      std::vector<uint32_t> robs_size;
      std::vector<unsigned> robs_history;
Aleksandra Poreba's avatar
Aleksandra Poreba committed
366
      std::vector<unsigned short> robs_status;
Aleksandra Poreba's avatar
Aleksandra Poreba committed
367
368
369
370
371
372
373
374
375
376

      robs_id.reserve(robData.requested_ROBs.size());
      robs_size.reserve(robData.requested_ROBs.size());
      robs_history.reserve(robData.requested_ROBs.size());
      robs_status.reserve(robData.requested_ROBs.size());

      for (const auto& rob : robData.requested_ROBs) {
        robs_id.push_back(rob.second.rob_id);
        robs_size.push_back(rob.second.rob_size);
        robs_history.push_back(rob.second.rob_history);
377
        robs_status.push_back(rob.second.isStatusOk());
Aleksandra Poreba's avatar
Aleksandra Poreba committed
378
379
      }

380
381
      bool result = true;
      result &= tc->setDetail("alg_idx", aiToHandleIndex[aiHash]);
Aleksandra Poreba's avatar
Aleksandra Poreba committed
382
383
384
385
      result &= tc->setDetail("lvl1ID", robData.lvl1ID);
      result &= tc->setDetail<std::vector<uint32_t>>("robs_id", robs_id);
      result &= tc->setDetail<std::vector<uint32_t>>("robs_size", robs_size);
      result &= tc->setDetail<std::vector<unsigned>>("robs_history", robs_history);
Aleksandra Poreba's avatar
Aleksandra Poreba committed
386
      result &= tc->setDetail<std::vector<unsigned short>>("robs_status", robs_status);
Aleksandra Poreba's avatar
Aleksandra Poreba committed
387
388
      result &= tc->setDetail("start", robData.start_time);
      result &= tc->setDetail("stop", robData.end_time);
389
390
391

      if (!result) ATH_MSG_WARNING("Failed to append one or more details to trigger cost ROS TC");
    }
392
393
  }

394
  if (msg().level() <= MSG::VERBOSE) {
395
    ATH_MSG_VERBOSE("--- Trig Cost Event Summary ---");
396
    for ( const xAOD::TrigComposite* tc : *costOutputHandle ) {
397
398
399
      ATH_MSG_VERBOSE("Algorithm:'" << TrigConf::HLTUtils::hash2string( tc->getDetail<TrigConf::HLTHash>("alg"), "ALG") << "'");
      ATH_MSG_VERBOSE("  Store:'" << TrigConf::HLTUtils::hash2string( tc->getDetail<TrigConf::HLTHash>("store"), "STORE") << "'");
      ATH_MSG_VERBOSE("  View ID:" << tc->getDetail<int16_t>("view"));
400
401
      ATH_MSG_VERBOSE("  Thread #:" << tc->getDetail<uint32_t>("thread") );
      ATH_MSG_VERBOSE("  Thread ID Hash:" << tc->getDetail<uint32_t>("thash") );
402
403
404
405
      ATH_MSG_VERBOSE("  Slot:" << tc->getDetail<uint32_t>("slot") );
      ATH_MSG_VERBOSE("  RoI ID Hash:" << tc->getDetail<int32_t>("roi") );
      ATH_MSG_VERBOSE("  Start Time:" << tc->getDetail<uint64_t>("start") << " mu s");
      ATH_MSG_VERBOSE("  Stop Time:" << tc->getDetail<uint64_t>("stop") << " mu s");
406
407
    }
  }
408

409
  
Tim Martin's avatar
Tim Martin committed
410
411
412
413
  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
414

415
StatusCode TrigCostSvc::generateTimeoutReport(const EventContext& context, std::string& report) {
Aleksandra Poreba's avatar
Aleksandra Poreba committed
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460

  ATH_CHECK(checkSlot(context));
  if (!m_eventMonitored[context.slot()]) {
    ATH_MSG_DEBUG("Not a monitored event.");
    report = "";
    return StatusCode::SUCCESS;
  }

  std::unique_lock lockUnique(m_slotMutex[context.slot()]);

  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator beginIt;
  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator endIt;
  tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator it;
  ATH_CHECK(m_algStartInfo.getIterators(context, msg(), beginIt, endIt));

  // Create map that sorts in descending order
  std::map<uint64_t, std::string, std::greater<uint64_t>> timeToAlgMap;

  for (it = beginIt; it != endIt; ++it) {
    const AlgorithmIdentifier& ai = it->first;
    const AlgorithmPayload& ap = it->second;

    // Don't look at any records from other slots
    if (ai.m_realSlot != context.slot()) continue;

    uint64_t startTime = ap.m_algStartTime.microsecondsSinceEpoch();
    uint64_t stopTime = 0;
    {
      tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
      if (m_algStopTime.retrieve(ai, stopTimeAcessor).isFailure()) {
        ATH_MSG_DEBUG("No end time for '" << ai.m_caller << "', '" << ai.m_store << "'");
      } else { // retrieve was a success
        stopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
      }
      // stopTimeAcessor goes out of scope - lock released
    }

    if (stopTime == 0) continue; 
 
    timeToAlgMap[stopTime-startTime] = ai.m_caller;
  }

  // Save top 5 times to the report
  report = "Timeout detected with the following algorithms consuming the most time: ";
  int algCounter = 0;
461
  for(const std::pair<const uint64_t, std::string>& p : timeToAlgMap){
Aleksandra Poreba's avatar
Aleksandra Poreba committed
462
    // Save time in miliseconds instead of microseconds
463
    report += p.second + " (" + std::to_string(std::round(p.first/3.)) + " ms)";
Aleksandra Poreba's avatar
Aleksandra Poreba committed
464
465
466
467
468
469
470
471
472
473
474
475
    ++algCounter;
    if (algCounter >= 5){
      break;
    }
    report += ", ";
  }

  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

476
StatusCode TrigCostSvc::checkSlot(const EventContext& context) const {
477
  if (context.slot() >= m_eventSlots) {
478
    ATH_MSG_FATAL("Job is using event slot #" << context.slot() << ", but we only reserved space for: " << m_eventSlots);
479
480
481
482
483
484
485
    return StatusCode::FAILURE;
  }
  return StatusCode::SUCCESS;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

486
int32_t TrigCostSvc::getROIID(const EventContext& context) {
487
488
  if (Atlas::hasExtendedEventContext(context)) {
    const TrigRoiDescriptor* roi = Atlas::getExtendedEventContext(context).roiDescriptor();
489
490
491
492
493
494
    if (roi) return static_cast<int32_t>(roi->roiId());
  }
  return AlgorithmIdentifier::s_noView;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
495

496
bool TrigCostSvc::isMonitoredEvent(const EventContext& context, const bool includeMultiSlot) const {
497
498
499
  if (m_eventMonitored[ context.slot() ]) {
    return true;
  }
500
  if (includeMultiSlot && m_enableMultiSlot) {
501
502
503
504
505
506
507
    return m_eventMonitored[ m_masterSlot ];
  }
  return false;
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

508
size_t TrigCostSvc::ThreadHashCompare::hash(const std::thread::id& thread) {
509
510
511
512
513
  return static_cast<size_t>( std::hash< std::thread::id >()(thread) );
}

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

514
bool TrigCostSvc::ThreadHashCompare::equal(const std::thread::id& x, const std::thread::id& y) {
515
516
  return (x == y);
}