diff --git a/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt b/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt
index 7e335590382b9b60a73475aa6421e1a6d38e7565..5f6f8390ac89d402014237231c82f2187d35e1d4 100644
--- a/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt
+++ b/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt
@@ -9,6 +9,7 @@ find_package( Boost )
 find_package( Python COMPONENTS Development )
 find_package( ROOT COMPONENTS Core ROOTTPython
    cppyy${Python_VERSION_MAJOR}_${Python_VERSION_MINOR} )
+find_package( TBB )
 find_package( nlohmann_json )
 find_package( psutil )
 
@@ -16,9 +17,9 @@ find_package( psutil )
 atlas_add_component( PerfMonComps
    src/*.cxx src/components/*.cxx
    INCLUDE_DIRS ${AIDA_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${ROOT_INCLUDE_DIRS}
-   ${Python_INCLUDE_DIRS}
+   ${Python_INCLUDE_DIRS} ${TBB_INCLUDE_DIRS}
    LINK_LIBRARIES ${Boost_LIBRARIES} ${ROOT_LIBRARIES} ${Python_LIBRARIES}
-   ${CMAKE_DL_LIBS} AthenaBaseComps AthenaKernel RootUtilsPyROOT CxxUtils
+   ${CMAKE_DL_LIBS} ${TBB_LIBRARIES} AthenaBaseComps AthenaKernel RootUtilsPyROOT CxxUtils
    PerfMonEvent PerfMonKernel SGTools StoreGateLib GaudiKernel
    AthDSoCallBacks nlohmann_json::nlohmann_json)
 
diff --git a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx
index d8e878ad9d689587f06d27334f946624fdbc4e4d..e873bf66c55fddaf6ea23a49011ea34ed929a599 100644
--- a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx
+++ b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx
@@ -22,6 +22,9 @@
 #include "boost/filesystem.hpp"
 #include "boost/format.hpp"
 
+// TBB
+#include "tbb/task_arena.h"
+
 /*
  * Constructor
  */
@@ -82,9 +85,8 @@ StatusCode PerfMonMTSvc::initialize() {
     ATH_MSG_INFO("  >> Component-level memory monitoring in the event-loop is disabled in jobs with more than 1 thread");
   }
 
-  // Slot specific component-level data map and locks
-  m_compLevelDataMapVec.resize(m_numberOfSlots); // Default construct
-  m_mutex_slots.resize(m_numberOfSlots); // Default construct
+  // Thread specific component-level data map
+  m_compLevelDataMapVec.resize(m_numberOfThreads+1); // Default construct
 
   // Set wall time offset
   m_eventLevelData.set_wall_time_offset(m_wallTimeOffset);
@@ -226,9 +228,8 @@ void PerfMonMTSvc::stopSnapshotAud(const std::string& stepName, const std::strin
  * Start Component Auditing
  */
 void PerfMonMTSvc::startCompAud(const std::string& stepName, const std::string& compName, const EventContext& ctx) {
-  // Lock for data integrity
-  const unsigned int islot = ctx.valid() ? ctx.slot() : 0;
-  std::lock_guard<std::mutex> lock(m_mutex_slots[islot]);
+  // Get the thread index
+  const unsigned int ithread = (ctx.valid() && tbb::this_task_arena::current_thread_index() > -1) ? tbb::this_task_arena::current_thread_index() : 0;
 
   // Memory measurement is only done outside the loop except when there is only a single thread
   const bool doMem = !ctx.valid() || (m_numberOfThreads == 1);
@@ -237,8 +238,8 @@ void PerfMonMTSvc::startCompAud(const std::string& stepName, const std::string&
   PMonMT::StepComp currentState = generate_state(stepName, compName);
 
   // Check if this is the first time calling if so create the mesurement data if not use the existing one.
-  // Metrics are collected per slot then aggregated before reporting
-  data_map_unique_t& compLevelDataMap = m_compLevelDataMapVec[islot];
+  // Metrics are collected per thread then aggregated before reporting
+  data_map_unique_t& compLevelDataMap = m_compLevelDataMapVec[ithread];
   if(compLevelDataMap.find(currentState) == compLevelDataMap.end()) {
     compLevelDataMap.insert({currentState, std::make_unique<PMonMT::ComponentData>()});
   }
@@ -250,7 +251,7 @@ void PerfMonMTSvc::startCompAud(const std::string& stepName, const std::string&
 
   // Debug
   ATH_MSG_DEBUG("Start Audit: ctx " << ctx.valid() << " evt " << ctx.evt() << " slot " << ctx.slot() <<
-                " component " << compName << " step " << stepName);
+                " thread index " << ithread << " component " << compName << " step " << stepName);
   ATH_MSG_DEBUG("Start CPU " << meas.cpu_time << " VMem " << meas.vmem << " Malloc " << meas.malloc);
 }
 
@@ -258,9 +259,8 @@ void PerfMonMTSvc::startCompAud(const std::string& stepName, const std::string&
  * Stop Component Auditing
  */
 void PerfMonMTSvc::stopCompAud(const std::string& stepName, const std::string& compName, const EventContext& ctx) {
-  // Lock for data integrity
-  const unsigned int islot = ctx.valid() ? ctx.slot() : 0;
-  std::lock_guard<std::mutex> lock(m_mutex_slots[islot]);
+  // Get the thread index
+  const unsigned int ithread = (ctx.valid() && tbb::this_task_arena::current_thread_index() > -1) ? tbb::this_task_arena::current_thread_index() : 0;
 
   // Memory measurement is only done outside the loop except when there is only a single thread
   const bool doMem = !ctx.valid() || (m_numberOfThreads == 1);
@@ -273,7 +273,7 @@ void PerfMonMTSvc::stopCompAud(const std::string& stepName, const std::string& c
   PMonMT::StepComp currentState = generate_state(stepName, compName);
 
   // Store
-  data_map_unique_t& compLevelDataMap = m_compLevelDataMapVec[islot];
+  data_map_unique_t& compLevelDataMap = m_compLevelDataMapVec[ithread];
   compLevelDataMap[currentState]->addPointStop(meas, doMem);
 
   // Once the first time IncidentProcAlg3 is excuted, toggle m_isFirstEvent to false.
@@ -286,7 +286,7 @@ void PerfMonMTSvc::stopCompAud(const std::string& stepName, const std::string& c
 
   // Debug
   ATH_MSG_DEBUG("Stop Audit: ctx " << ctx.valid() << " evt " << ctx.evt() << " slot " << ctx.slot() <<
-                " component " << compName << " step " << stepName);
+                " thread index " << ithread << " component " << compName << " step " << stepName);
   ATH_MSG_DEBUG("Stop CPU " << meas.cpu_time << " VMem " << meas.vmem << " Malloc " << meas.malloc);
   ATH_MSG_DEBUG("  >> Start CPU " << compLevelDataMap[currentState]->m_tmp_cpu << " VMem "
                                  << compLevelDataMap[currentState]->m_tmp_vmem << " Malloc "
@@ -775,9 +775,22 @@ void PerfMonMTSvc::aggregateSlotData() {
       } else {
         m_compLevelDataMap[it.first]->add2CallCount(it.second->getCallCount());
         m_compLevelDataMap[it.first]->add2DeltaCPU(it.second->getDeltaCPU());
+        m_compLevelDataMap[it.first]->add2DeltaWall(it.second->getDeltaWall());
         m_compLevelDataMap[it.first]->add2DeltaVmem(it.second->getDeltaVmem());
         m_compLevelDataMap[it.first]->add2DeltaMalloc(it.second->getDeltaMalloc());
       }
+      // Do a quick consistency check here and print any suspicious measurements.
+      // Timing measurements should always be positive definite
+      if(it.second->getDeltaCPU() < 0) {
+        ATH_MSG_WARNING("Negative CPU-time measurement of " << it.second->getDeltaCPU() <<
+                        " ms for component " << it.first.compName <<
+                        " in step " << it.first.stepName);
+      }
+      if(it.second->getDeltaWall() < 0) {
+        ATH_MSG_WARNING("Negative Wall-time measurement of " << it.second->getDeltaWall() <<
+                        " ms for component " << it.first.compName <<
+                        " in step " << it.first.stepName);
+      }
     }
   }
 }
diff --git a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.h b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.h
index e0c76d7503752981f1bbdebe03dfc715cf173f36..d3ac4b44d2319a33773ba87f4888f399dca33b45 100644
--- a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.h
+++ b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.h
@@ -171,9 +171,6 @@ class PerfMonMTSvc : virtual public IPerfMonMTSvc, virtual public IIncidentListe
   // Lock for capturing event loop measurements
   std::mutex m_mutex_capture;
 
-  // Locks for capturing component measurements
-  std::deque<std::mutex> m_mutex_slots;
-
   // Are we processing the first event?
   std::atomic<bool> m_isFirstEvent;