From 3ab73357a93a0d2881871050d08c285dd763c6f0 Mon Sep 17 00:00:00 2001
From: scott snyder <snyder@bnl.gov>
Date: Wed, 8 Jul 2020 16:24:56 +0200
Subject: [PATCH] StoreGate: Fix potential deadlock between SGImplSvc and
 DataProxy

The SGImplSvc::proxy methods take out a lock on the store, fetch
the proxy, then call isValid() on the proxy.  DataProxy::isValid()
will acquire a lock on the proxy and than can call back to the store
via TransientAddress::contextFromStore().  Thus, we can get a deadlock
if SGImplSvc::proxy() is called in one thread and DataProxy::isValid()
in another.

Resolve by releasing the SGImplSvc lock in proxy() before calling
DataProxy::isValid().
---
 Control/StoreGate/src/SGImplSvc.cxx | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/Control/StoreGate/src/SGImplSvc.cxx b/Control/StoreGate/src/SGImplSvc.cxx
index 502702c18f91..d950f8257e0f 100644
--- a/Control/StoreGate/src/SGImplSvc.cxx
+++ b/Control/StoreGate/src/SGImplSvc.cxx
@@ -796,12 +796,17 @@ SGImplSvc::proxy(const CLID& id) const
 DataProxy* 
 SGImplSvc::proxy(const CLID& id, bool checkValid) const
 { 
-  lock_t lock (m_mutex);
-  DataProxy* dp = m_pStore->proxy(id);
-  if (0 == dp && 0 != m_pPPS) {
-    dp = m_pPPS->retrieveProxy(id, string("DEFAULT"), *m_pStore);
+  DataProxy* dp = nullptr;
+  {
+    lock_t lock (m_mutex);
+    dp = m_pStore->proxy(id);
+    if (0 == dp && 0 != m_pPPS) {
+      dp = m_pPPS->retrieveProxy(id, string("DEFAULT"), *m_pStore);
+    }
   }
   /// Check if it is valid
+  // Be sure to release the lock before this.
+  // isValid() may call back to the store, so we could otherwise deadlock..
   if (checkValid && 0 != dp) {
     // FIXME: For keyless retrieve, this checks only the first instance
     // of the CLID in store. If that happens to be invalid, but the second
@@ -820,11 +825,16 @@ SGImplSvc::proxy(const CLID& id, const string& key) const
 DataProxy*
 SGImplSvc::proxy(const CLID& id, const string& key, bool checkValid) const
 { 
-  lock_t lock (m_mutex);
-  DataProxy* dp = m_pStore->proxy(id, key);
-  if (0 == dp && 0 != m_pPPS) {
-    dp = m_pPPS->retrieveProxy(id, key, *m_pStore);
+  DataProxy* dp = nullptr;
+  {
+    lock_t lock (m_mutex);
+    dp = m_pStore->proxy(id, key);
+    if (0 == dp && 0 != m_pPPS) {
+      dp = m_pPPS->retrieveProxy(id, key, *m_pStore);
+    }
   }
+  // Be sure to release the lock before this.
+  // isValid() may call back to the store, so we could otherwise deadlock..
   if (checkValid && 0 != dp && !(dp->isValid())) dp = 0;
   return dp;
 }
-- 
GitLab