diff --git a/Simulation/G4Atlas/G4AtlasApps/share/jobOptions.G4AtlasMT.py b/Simulation/G4Atlas/G4AtlasApps/share/jobOptions.G4AtlasMT.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ea47a66dc896a108408b85b73d764ff82e4e38b
--- /dev/null
+++ b/Simulation/G4Atlas/G4AtlasApps/share/jobOptions.G4AtlasMT.py
@@ -0,0 +1,106 @@
+#
+#  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
+#
+
+#
+# Job options file for multi-threaded Geant4 ATLAS detector simulation.
+# Run this by specifying the number of threads on the command line:
+# $ athena --threads=4 G4AtlasApps/jobOptions.G4AtlasMT.py
+#
+
+from AthenaCommon.Logging import log as msg
+
+from AthenaCommon.ConcurrencyFlags import jobproperties as jp
+nThreads = jp.ConcurrencyFlags.NumThreads()
+if (nThreads < 1) :
+   msg.fatal('numThreads must be >0. Did you set the --threads=N option?')
+   sys.exit(AthenaCommon.ExitCodes.CONFIGURATION_ERROR)
+
+# Message stream format
+msgFmt = "% F%40W%S%5W%e%s%7W%R%T %0W%M"
+svcMgr.MessageSvc.Format = msgFmt
+# svcMgr.MessageSvc.useColors = True
+# svcMgr.AthenaHiveEventLoopMgr.OutputLevel = DEBUG
+
+# Make the scheduler dump some information
+from AthenaCommon.AlgScheduler import AlgScheduler
+AlgScheduler.ShowControlFlow( True )
+AlgScheduler.ShowDataDependencies( True )
+
+# Thread pool service and G4 thread initialization
+from GaudiHive.GaudiHiveConf import ThreadPoolSvc
+svcMgr += ThreadPoolSvc("ThreadPoolSvc")
+svcMgr.ThreadPoolSvc.ThreadInitTools = ["G4ThreadInitTool"]
+
+
+# AthenaCommon flags
+from AthenaCommon.AthenaCommonFlags import athenaCommonFlags
+athenaCommonFlags.PoolEvgenInput = ['/afs/cern.ch/atlas/offline/ProdData/15.6.11.3/mu_E200_eta0-60-10000.evgen.pool.root']
+athenaCommonFlags.PoolHitsOutput = "g4hive.hits.pool.root"
+athenaCommonFlags.EvtMax = 500
+
+# Global conditions tag
+from AthenaCommon.GlobalFlags import jobproperties
+jobproperties.Global.ConditionsTag = "OFLCOND-RUN12-SDR-21"
+
+# Detector flags
+from AthenaCommon.DetFlags import DetFlags
+DetFlags.ID_setOn()
+DetFlags.Calo_setOn()
+DetFlags.Muon_setOn()
+DetFlags.Lucid_setOff()
+DetFlags.Truth_setOn()
+
+# Simulation flags
+from G4AtlasApps.SimFlags import simFlags
+simFlags.load_atlas_flags()
+# Use the default layout
+simFlags.SimLayout.set_On()
+# Set the EtaPhi, VertexSpread and VertexRange checks on/off
+simFlags.EventFilter.set_Off()
+# Set the LAr parameterization
+#simFlags.LArParameterization = 2
+# Magnetic field
+simFlags.MagneticField.set_On()
+
+# Debug outputs of user actions
+#CfgGetter.getPublicTool('G4UA::AthenaTrackingActionTool').OutputLevel = DEBUG
+
+# Setup the algorithm sequence
+from AthenaCommon.AlgSequence import AlgSequence
+topSeq = AlgSequence()
+
+# SGInputLoader is a module in SGComps that will do a typeless StoreGate read
+# of data on disk, to preload it in the Whiteboard for other Alorithms to use.
+# It uses the same syntax as Algorithmic dependency declarations.
+from AthenaCommon import CfgMgr
+topSeq += CfgMgr.SGInputLoader(OutputLevel=INFO, ShowEventDump=False)
+topSeq.SGInputLoader.Load = [('McEventCollection', 'StoreGateSvc+GEN_EVENT')]
+
+# Add the beam effects algorithm
+from AthenaCommon.CfgGetter import getAlgorithm
+topSeq += getAlgorithm("BeamEffectsAlg", tryDefaultConfigurable=True)
+
+# Add the (python) G4 simulation service.
+# This will kickstart a lot of simulation setup.
+from G4AtlasApps.PyG4Atlas import PyG4AtlasSvc
+svcMgr += PyG4AtlasSvc()
+
+# Explicitly specify the data-flow dependencies of G4AtlasAlg and StreamHITS.
+# This is done like this because currently our VarHandles do not live in the
+# algorithm but rather in Geant4 components.
+# TODO: make this declaration more automatic
+topSeq.G4AtlasAlg.ExtraInputs =  [('McEventCollection','StoreGateSvc+BeamTruthEvent')]
+topSeq.G4AtlasAlg.ExtraOutputs = [('SiHitCollection','StoreGateSvc+SCT_Hits')]
+topSeq.StreamHITS.ExtraInputs += topSeq.G4AtlasAlg.ExtraOutputs
+
+# Increase verbosity of the output stream
+#topSeq.StreamHITS.OutputLevel = DEBUG
+
+# Disable alg filtering - doesn't work yet in multi-threading
+topSeq.StreamHITS.AcceptAlgs = []
+
+# Override algorithm cloning settings
+for alg in topSeq:
+    if alg.name() != 'StreamHITS':
+        alg.Cardinality = nThreads
diff --git a/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.cxx b/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..ce2d101c3d03355fb8059e240c77c5dd3baf0380
--- /dev/null
+++ b/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.cxx
@@ -0,0 +1,119 @@
+/*
+  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
+*/
+
+// Main header include
+#include "G4ThreadInitTool.h"
+
+// Geant4 includes
+#include "G4WorkerRunManager.hh"
+#include "G4UImanager.hh"
+#include "G4MTRunManager.hh"
+#include "G4WorkerThread.hh"
+#include "G4UserWorkerThreadInitialization.hh"
+#include "G4RunManager.hh"
+#include "G4VUserActionInitialization.hh"
+#include "G4UserWorkerInitialization.hh"
+#include "G4AutoDelete.hh"
+
+// System includes
+#include <unistd.h>
+#include <sys/syscall.h>
+
+//-----------------------------------------------------------------------------
+// Constructor
+//-----------------------------------------------------------------------------
+G4ThreadInitTool::G4ThreadInitTool(const std::string& type,
+                                   const std::string& name,
+                                   const IInterface* parent)
+  : base_class(type, name, parent),
+    m_nInitThreads(0)
+{}
+
+//-----------------------------------------------------------------------------
+// Worker thread initialization.
+// This code is modeled after G4MTRunManagerKernel::StartThread.
+//-----------------------------------------------------------------------------
+void G4ThreadInitTool::initThread()
+{
+  ATH_MSG_INFO("==> tbb thread started with id: 0x" <<
+               std::hex << pthread_self() << std::dec);
+
+  // Define the G4 worker thread context and setup its cleanup mechanism.
+  auto wThreadContext = new G4WorkerThread;
+  G4AutoDelete::Register(wThreadContext);
+
+  // Assign the thread ID
+  static std::atomic_uint tid(0);
+  wThreadContext->SetThreadId( tid++ );
+  G4Threading::G4SetThreadId( wThreadContext->GetThreadId() );
+
+  // Setup thread-local geometry and physics
+  wThreadContext->BuildGeometryAndPhysicsVector();
+
+  // Retrieve the master thread run manager
+  G4MTRunManager* masterRM = G4MTRunManager::GetMasterRunManager();
+  // Worker thread initialization object
+  const G4UserWorkerThreadInitialization* workerInitializer =
+    masterRM->GetUserWorkerThreadInitialization();
+
+  // Random number setup.
+  // TODO: revisit this once MT AthRNGSvc is available.
+  const CLHEP::HepRandomEngine* masterEngine = masterRM->getMasterRandomEngine();
+  workerInitializer->SetupRNGEngine(masterEngine);
+
+  // Create the thread-local worker run manager (G4AtlasWorkerRunManager)
+  ATH_MSG_INFO("Creating worker RM");
+  G4WorkerRunManager* wrm = workerInitializer->CreateWorkerRunManager();
+  wrm->SetWorkerThread(wThreadContext);
+
+  // Share detector from master with worker.
+  ATH_MSG_INFO("Assigning detector construction");
+  const G4VUserDetectorConstruction* detector =
+    masterRM->GetUserDetectorConstruction();
+  // I don't want to const-cast here, but this is what they do in G4's
+  // StartThread function, so there is likely no alternative.
+  wrm->G4RunManager::SetUserInitialization
+    (const_cast<G4VUserDetectorConstruction*>(detector));
+  // Share physics list from master with worker.
+  const G4VUserPhysicsList* physicslist = masterRM->GetUserPhysicsList();
+  wrm->SetUserInitialization(const_cast<G4VUserPhysicsList*>(physicslist));
+
+  // Build thread-local user actions - NOT CURRENTLY USED.
+  if(masterRM->GetUserActionInitialization()) {
+    masterRM->GetNonConstUserActionInitialization()->Build();
+  }
+
+  // Start user worker initialization
+  if(masterRM->GetUserWorkerInitialization()) {
+    masterRM->GetUserWorkerInitialization()->WorkerStart();
+  }
+
+  // Initialize the worker run manager
+  ATH_MSG_INFO("Initializing worker RM");
+  wrm->Initialize();
+
+  // Copy the UI commands to the worker
+  std::vector<G4String> cmds = masterRM->GetCommandStack();
+  ATH_MSG_INFO (cmds.size() << " commands in UI stack");
+  G4UImanager* uimgr = G4UImanager::GetUIpointer();
+  for(const auto& it : cmds) {
+    ATH_MSG_INFO ("Adding command to worker: " << it);
+    uimgr->ApplyCommand(it);
+  }
+
+  // Atomic increment number of initialized threads
+  m_nInitThreads++;
+
+  ATH_MSG_INFO("==> tbb thread end of initThread with id: 0x" <<
+               std::hex << pthread_self() << std::dec);
+}
+
+//-----------------------------------------------------------------------------
+// Worker thread termination
+//-----------------------------------------------------------------------------
+void G4ThreadInitTool::terminateThread()
+{
+  // Atomic decrement number of initialized threads
+  m_nInitThreads--;
+}
diff --git a/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.h b/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.h
new file mode 100644
index 0000000000000000000000000000000000000000..b1e2ce119f33c7e9dacbf79b39e9638fe5be1285
--- /dev/null
+++ b/Simulation/G4Atlas/G4AtlasTools/src/G4ThreadInitTool.h
@@ -0,0 +1,47 @@
+/*
+  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
+*/
+
+#ifndef G4ATLASTOOLS_G4THREADINITTOOL_H
+#define G4ATLASTOOLS_G4THREADINITTOOL_H
+
+#include "AthenaBaseComps/AthAlgTool.h"
+#include "GaudiKernel/IThreadInitTool.h"
+
+#include <string>
+#include <atomic>
+
+/// @class G4ThreadInitTool
+/// @brief A tool which sets up the worker-thread-local workspace for Geant4.
+///
+/// @author Steve Farrell <Steven.Farrell@cern.ch>
+///
+class G4ThreadInitTool : virtual public extends1<AthAlgTool, IThreadInitTool>
+{
+
+  public:
+
+    /// Standard tool constructor
+    G4ThreadInitTool(const std::string&, const std::string&, const IInterface*);
+
+    /// Set up the Geant4 workspace for this worker thread
+    virtual void initThread() override final;
+
+    /// Tear down the Geant4 workspace for this worker thread
+    virtual void terminateThread() override final;
+
+    /// Counter used for barrier mechanism in thread initialization.
+    /// This number needs to be correctly reported to properly ensure
+    /// scheduling of thread-initialization tasks on every thread.
+    virtual unsigned int nInit() const override final {
+      return m_nInitThreads;
+    }
+
+  private:
+
+    /// Counter of threads that have been initialized
+    std::atomic_uint m_nInitThreads;
+
+}; // class G4ThreadInitTool
+
+#endif // G4ATLASTOOLS_G4THREADINITTOOL_H
diff --git a/Simulation/G4Atlas/G4AtlasTools/src/components/G4AtlasTools_entries.cxx b/Simulation/G4Atlas/G4AtlasTools/src/components/G4AtlasTools_entries.cxx
index c74a873e541fabfa6934a543ceeb173fbb70bcbf..e19ec7de5c51c0d6da6693242afb860d37d4cf9f 100644
--- a/Simulation/G4Atlas/G4AtlasTools/src/components/G4AtlasTools_entries.cxx
+++ b/Simulation/G4Atlas/G4AtlasTools/src/components/G4AtlasTools_entries.cxx
@@ -14,6 +14,7 @@
 #include "../FastSimulationMasterTool.h"
 #include "G4AtlasTools/GlobalFieldManagerTool.h"
 #include "G4AtlasTools/DetectorFieldManagerTool.h"
+#include "../G4ThreadInitTool.h"
 
 DECLARE_TOOL_FACTORY( PhysicsListToolBase )
 DECLARE_TOOL_FACTORY( DetectorGeometryBase )
@@ -29,3 +30,4 @@ DECLARE_TOOL_FACTORY( FastSimulationMasterTool )
 DECLARE_TOOL_FACTORY( AddPhysicsDecayTool )
 DECLARE_TOOL_FACTORY( GlobalFieldManagerTool )
 DECLARE_TOOL_FACTORY( DetectorFieldManagerTool )
+DECLARE_TOOL_FACTORY( G4ThreadInitTool )