diff --git a/Event/EventBase/include/Event/Proxy.h b/Event/EventBase/include/Event/Proxy.h
index 8a71586e4e9b5281e0cbb9e1cab3f789605f41e3..6030aed45b96c9270ecde6a21dde91fa419db740 100644
--- a/Event/EventBase/include/Event/Proxy.h
+++ b/Event/EventBase/include/Event/Proxy.h
@@ -33,32 +33,57 @@ namespace LHCb::Event {
   /**
    * Proxy representing an N-Dimensional array of numerical data (ints or floats)
    */
-  template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour, typename ContainerType,
-            typename... Ts>
+  template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
+            typename ContainerTypeBase, typename... Ts>
   struct NDNumericProxy {
-    using simd_t     = SIMDWrapper::type_map_t<Simd>;
-    using type       = typename Tag::template vec_type<simd_t>;
-    using OffsetType = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+    using simd_t        = SIMDWrapper::type_map_t<Simd>;
+    using type          = typename Tag::template vec_type<simd_t>;
+    using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+    using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
 
-    NDNumericProxy( ContainerType* container, OffsetType offset, Ts... Is )
+    NDNumericProxy( ContainerType container, OffsetType offset, Ts... Is )
         : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
 
     inline __attribute__( ( always_inline ) ) constexpr auto get() const {
-      auto ptr = std::apply(
-          [&]( auto&&... args ) {
-            return m_container->template data<Tag>( std::forward<decltype( args )>( args )... );
-          },
-          m_indices );
-      if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScatterGather ) {
-        return gather( ptr, m_offset );
-      } else if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScalarFill ) {
-        return type{*std::next( ptr, m_offset )};
-      } else if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::Contiguous ) {
-        return type{std::next( ptr, m_offset )};
+      if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScatterGather2D ) {
+        auto const [i, j, mask] = m_offset;
+
+        std::array<int, simd_t::size> i_v;
+        i.store( i_v );
+
+        type out{0};
+        auto progressMask = mask;
+        int  first        = ffs( progressMask ) - 1;
+        while ( first >= 0 ) {
+          auto gatherMask = ( i == i_v[first] ) && mask;
+          auto base       = std::apply(
+              [&]( auto&&... args ) {
+                return m_container[i_v[first]]->template data<Tag>( std::forward<decltype( args )>( args )... );
+              },
+              m_indices );
+          out          = maskgather( base, j, gatherMask, out );
+          progressMask = progressMask && !gatherMask; // Update progress
+          first        = ffs( progressMask ) - 1;
+        }
+
+        return out;
       } else {
-        static_assert( Behaviour == LHCb::Pr::ProxyBehaviour::Compress,
-                       "New ProxyBehaviour was added but not implemented everywhere!" );
-        static_assert( Behaviour != LHCb::Pr::ProxyBehaviour::Compress, "Compress proxies are write-only." );
+        auto ptr = std::apply(
+            [&]( auto&&... args ) {
+              return m_container->template data<Tag>( std::forward<decltype( args )>( args )... );
+            },
+            m_indices );
+        if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScatterGather ) {
+          return gather( ptr, m_offset );
+        } else if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScalarFill ) {
+          return type{*std::next( ptr, m_offset )};
+        } else if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::Contiguous ) {
+          return type{std::next( ptr, m_offset )};
+        } else {
+          static_assert( Behaviour == LHCb::Pr::ProxyBehaviour::Compress,
+                         "New ProxyBehaviour was added but not implemented everywhere!" );
+          static_assert( Behaviour != LHCb::Pr::ProxyBehaviour::Compress, "Compress proxies are write-only." );
+        }
       }
     }
 
@@ -84,15 +109,132 @@ namespace LHCb::Event {
     }
 
   private:
-    ContainerType*          m_container;
+    ContainerType           m_container;
     OffsetType              m_offset;
     const std::tuple<Ts...> m_indices;
   };
 
+  template <std::size_t... Ns>
+  struct array_field {
+    template <typename T>
+    [[nodiscard]] static constexpr std::size_t num_columns( T const& ) {
+      return ( 1 * ... * Ns );
+    }
+    template <typename T, typename... Ts>
+    [[nodiscard]] static constexpr std::size_t flat_index( T const&, Ts... Is ) {
+      assert( ( ... && ( static_cast<std::size_t>( Is ) < Ns ) ) );
+      std::size_t idx{0};
+      ( ( idx = idx * Ns + static_cast<std::size_t>( Is ) ), ... ); // the cast allows this to be used with enum classes
+      return idx;
+    }
+    template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
+              typename ContainerType, typename... Ts>
+    using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts...>;
+  };
+  template <std::size_t... N>
+  struct ints_field : array_field<N...> {
+    using type = int;
+    template <typename simd_t>
+    using vec_type = SIMDWrapper::vector_type_map_t<type, simd_t>;
+  };
+  template <std::size_t... N>
+  struct floats_field : array_field<N...> {
+    using type = float;
+    template <typename simd_t>
+    using vec_type = SIMDWrapper::vector_type_map_t<type, simd_t>;
+  };
+
+  using int_field   = ints_field<>;
+  using float_field = floats_field<>;
+
+  /**
+   * ND Tag and proxy for representing a 2D index into multiple containers `base[i]->field[j]`
+   */
+  template <typename ChildContainerType, int jbits = 24, std::size_t... N>
+  struct indices2D_field : ints_field<N..., 1> {
+    template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
+              typename ContainerTypeBase, typename... Ts>
+    struct NDIndex2DProxy {
+      using simd_t        = SIMDWrapper::type_map_t<Simd>;
+      using int_v         = typename Tag::template vec_type<simd_t>;
+      using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+      using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
+      using proxy_type    = NDNumericProxy<Tag, Simd, Behaviour, ContainerTypeBase, Ts..., std::size_t>;
+
+      NDIndex2DProxy( ContainerType container, OffsetType offset, Ts... Is )
+          : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
+
+      [[nodiscard]] constexpr auto proxy() const {
+        return std::apply(
+            [&]( auto&&... args ) {
+              return proxy_type{m_container, m_offset, std::forward<decltype( args )>( args )..., 0};
+            },
+            m_indices );
+      }
+
+      [[nodiscard]] inline __attribute__( ( always_inline ) ) constexpr auto index() const { return proxy().get(); }
+
+      inline __attribute__( ( always_inline ) ) constexpr void set( int_v i, int_v j ) {
+        proxy().set( ( i << jbits ) | j );
+      }
+
+      inline __attribute__( ( always_inline ) ) constexpr void set( int_v j ) { set( 0, j ); }
+
+      template <typename M = std::true_type>
+      constexpr auto get( M&& withmask = {} ) {
+        static_assert( Behaviour == LHCb::Pr::ProxyBehaviour::ScatterGather2D,
+                       "NDIndex2D is only supported by ScatterGather2D proxies." );
+
+        using proxy_type =
+            typename ChildContainerType::template proxy_type<Simd, LHCb::Pr::ProxyBehaviour::ScatterGather2D,
+                                                             ChildContainerType>;
+        std::array<ChildContainerType*, simd_t::size> base{nullptr};
+        int_v                                         k_{0};
+
+        int_v idx = index();
+        int_v i2  = idx >> jbits;
+        int_v j2  = idx & ( ( 1 << jbits ) - 1 );
+
+        std::array<int, simd_t::size> i2_v;
+        i2.store( i2_v );
+
+        auto [i1, j1, mask] = m_offset;
+        std::array<int, simd_t::size> i1_v;
+        i1.store( i1_v );
+
+        if constexpr ( !std::is_same_v<std::true_type, std::decay_t<M>> ) { mask = mask && withmask; }
+
+        auto progressMask = mask;
+        int  first        = ffs( progressMask ) - 1;
+        int  k            = 0;
+        while ( first >= 0 ) {
+          auto updateMask = ( i1_v[first] == i1 ) && ( i2_v[first] == i2 ) && mask;
+          k_              = select( updateMask, int_v{k}, k_ );
+          base[k++]       = m_container[i1_v[first]]->template containers<Tag>()[i2_v[first]];
+          progressMask    = progressMask && !updateMask;
+          first           = ffs( progressMask ) - 1;
+        }
+
+        return proxy_type{base, {k_, j2, mask}};
+      }
+
+    private:
+      ContainerType           m_container;
+      OffsetType              m_offset;
+      const std::tuple<Ts...> m_indices;
+    };
+
+    template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
+              typename ContainerType, typename... Ts>
+    using proxy_type = NDIndex2DProxy<Tag, Simd, Behaviour, ContainerType, Ts...>;
+  };
+  template <typename ChildContainerType, int jbits = 24>
+  using index2D_field = indices2D_field<ChildContainerType, jbits>;
+
   /**
    * Proxy representing an SOACollection
    */
-  template <SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour, typename ContainerType>
+  template <SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour, typename ContainerTypeBase>
   struct Proxy {
     static_assert(
         Simd != SIMDWrapper::Best,
@@ -104,31 +246,32 @@ namespace LHCb::Event {
     using simd_t                                      = SIMDWrapper::type_map_t<Simd>;
     using mask_v                                      = typename simd_t::mask_v;
     using OffsetType                                  = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+    using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
 
     template <typename Tag>
-    using hasTag = typename ContainerType::template hasTag<Tag>;
+    using hasTag = typename ContainerTypeBase::template hasTag<Tag>;
 
-    Proxy( ContainerType* container, OffsetType offset ) : m_container( container ), m_offset( offset ) {}
+    Proxy( ContainerType container, OffsetType offset ) : m_container( container ), m_offset( offset ) {}
 
     // Make a const proxy from non-const proxy
-    template <typename T = ContainerType, typename std::enable_if_t<std::is_const_v<T>, int> = 0>
+    template <typename T = ContainerTypeBase, typename std::enable_if_t<std::is_const_v<T>, int> = 0>
     Proxy( Proxy<Simd, Behaviour, std::remove_const_t<T>> other )
-        : m_container{&other.container()}, m_offset{other.offset()} {}
+        : m_container{other.container()}, m_offset{other.offset()} {}
 
     template <typename Tag, typename... Ts>
     constexpr auto field( Ts... Is ) const {
-      using tag_proxy = typename Tag::template proxy_type<Tag, Simd, Behaviour, ContainerType, Ts...>;
+      using tag_proxy = typename Tag::template proxy_type<Tag, Simd, Behaviour, ContainerTypeBase, Ts...>;
       return tag_proxy{m_container, m_offset, Is...};
     }
 
     template <typename Tag, typename... Ts>
     constexpr auto get( Ts... Is ) const {
-      using tag_proxy = typename Tag::template proxy_type<Tag, Simd, Behaviour, ContainerType, Ts...>;
+      using tag_proxy = typename Tag::template proxy_type<Tag, Simd, Behaviour, ContainerTypeBase, Ts...>;
       return tag_proxy{m_container, m_offset, Is...}.get();
     }
 
-    constexpr ContainerType&       container() { return *m_container; }
-    constexpr ContainerType const& container() const { return *m_container; }
+    constexpr ContainerType&       container() { return m_container; }
+    constexpr ContainerType const& container() const { return m_container; }
 
     constexpr auto loop_mask() const {
       if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::Contiguous ) {
@@ -139,6 +282,8 @@ namespace LHCb::Event {
         //      could imagine having a proxy that knows which of its elements
         //      are valid
         return simd_t::mask_true();
+      } else if constexpr ( Behaviour == LHCb::Pr::ProxyBehaviour::ScatterGather2D ) {
+        return std::get<2>( offset() );
       } else {
         static_assert( Behaviour == LHCb::Pr::ProxyBehaviour::Compress );
         // TODO what makes sense here?
@@ -168,8 +313,8 @@ namespace LHCb::Event {
     static constexpr mask_v mask_true() { return mask_v{true}; }
 
   private:
-    ContainerType* m_container;
-    OffsetType     m_offset;
+    ContainerType m_container;
+    OffsetType    m_offset;
   };
 
   namespace detail {
diff --git a/Event/EventBase/include/Event/SOACollection.h b/Event/EventBase/include/Event/SOACollection.h
index 3ec6e910f2f6ff0c85545adff62d24110e82e406..d0286f249e8d25f9f055678fe95a782e2d38b9fd 100644
--- a/Event/EventBase/include/Event/SOACollection.h
+++ b/Event/EventBase/include/Event/SOACollection.h
@@ -26,39 +26,6 @@
 // For example and documentation, see: Event/TrackEvent/Event/PrSeedTracks.h
 
 namespace LHCb::Event {
-  template <std::size_t... Ns>
-  struct array_field {
-    template <typename T>
-    [[nodiscard]] static constexpr std::size_t num_columns( T const& ) {
-      return ( 1 * ... * Ns );
-    }
-    template <typename T, typename... Ts>
-    [[nodiscard]] static constexpr std::size_t flat_index( T const&, Ts... Is ) {
-      assert( ( ... && ( static_cast<std::size_t>( Is ) < Ns ) ) );
-      std::size_t idx{0};
-      ( ( idx = idx * Ns + static_cast<std::size_t>( Is ) ), ... ); // the cast allows this to be used with enum classes
-      return idx;
-    }
-    template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
-              typename ContainerType, typename... Ts>
-    using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts...>;
-  };
-  template <std::size_t... N>
-  struct ints_field : array_field<N...> {
-    using type = int;
-    template <typename simd_t>
-    using vec_type = SIMDWrapper::vector_type_map_t<type, simd_t>;
-  };
-  template <std::size_t... N>
-  struct floats_field : array_field<N...> {
-    using type = float;
-    template <typename simd_t>
-    using vec_type = SIMDWrapper::vector_type_map_t<type, simd_t>;
-  };
-
-  using int_field   = ints_field<>;
-  using float_field = floats_field<>;
-
   namespace detail {
     template <typename, typename, typename = void>
     struct has_vec_type : std::false_type {};
diff --git a/Event/EventBase/include/Event/SOAZip.h b/Event/EventBase/include/Event/SOAZip.h
index 466afdeef24587f8bfa90adaef6486fea86e48e8..77bdf7f82550e6ffc77e6e6c4f03f25abb4bbc9d 100644
--- a/Event/EventBase/include/Event/SOAZip.h
+++ b/Event/EventBase/include/Event/SOAZip.h
@@ -166,6 +166,30 @@ namespace LHCb::Event {
           },
           m_containers );
     }
+
+    auto gather2D( typename default_simd_t::int_v const& indices, typename default_simd_t::mask_v const& mask ) {
+      using int_v = typename default_simd_t::int_v;
+      return std::apply(
+          [&]( ContainerTypes*... args ) {
+            return zip_proxy_type<LHCb::Pr::ProxyBehaviour::ScatterGather2D>{
+                {std::array<ContainerTypes*, default_simd_t::size>{args},
+                 typename zip_proxy_type<LHCb::Pr::ProxyBehaviour::ScatterGather2D>::OffsetType{int_v{0}, indices,
+                                                                                                mask}}...};
+          },
+          m_containers );
+    }
+    auto gather2D( typename default_simd_t::int_v const& indices, typename default_simd_t::mask_v const& mask ) const {
+      using int_v = typename default_simd_t::int_v;
+      return std::apply(
+          [&]( ContainerTypes*... args ) {
+            return const_zip_proxy_type<LHCb::Pr::ProxyBehaviour::ScatterGather2D>{
+                {std::array<ContainerTypes*, default_simd_t::size>{args},
+                 typename const_zip_proxy_type<LHCb::Pr::ProxyBehaviour::ScatterGather2D>::OffsetType{int_v{0}, indices,
+                                                                                                      mask}}...};
+          },
+          m_containers );
+    }
+
     auto scalar_fill( typename zip_proxy_type<LHCb::Pr::ProxyBehaviour::ScalarFill>::OffsetType const& offset ) {
       return std::apply(
           [&]( auto&&... args ) {
diff --git a/Event/EventBase/include/Event/Zip.h b/Event/EventBase/include/Event/Zip.h
index 18c2f7d2de643253806e41a755573569f9a41d2c..649fe4bded39900af8408dac9244100b6a4abafe 100644
--- a/Event/EventBase/include/Event/Zip.h
+++ b/Event/EventBase/include/Event/Zip.h
@@ -35,19 +35,20 @@ namespace LHCb::Pr {
 
   /** This enum is used to tag the data-loading and data-writing behaviour of a
    *  proxy object.
-   *   -      Compress: write-only, proxy is initialised with an offset and a
-   *                    mask, setters generate compress-stores operations using
-   *                    the offset and mask.
-   *   -    Contiguous: getters read contiguously from the given scalar offset,
-   *                    setters write contiguously to the same offset.
-   *   -    ScalarFill: read-only, getters read a single value from the given
-   *                    scalar offset and return a SIMD vector filled with
-   *                    copies of this value.
-   *   - ScatterGather: proxy is initialised with a SIMD vector of offsets,
-   *                    getters gather from these offsets, setters scatter to
-   *                    them [the scatter is not implemented yet].
+   *   -        Compress: write-only, proxy is initialised with an offset and a
+   *                      mask, setters generate compress-stores operations using
+   *                      the offset and mask.
+   *   -      Contiguous: getters read contiguously from the given scalar offset,
+   *                      setters write contiguously to the same offset.
+   *   -      ScalarFill: read-only, getters read a single value from the given
+   *                      scalar offset and return a SIMD vector filled with
+   *                      copies of this value.
+   *   -   ScatterGather: proxy is initialised with a SIMD vector of offsets,
+   *                      getters gather from these offsets, setters scatter to
+   *                      them [the scatter is not implemented yet].
+   *   - ScatterGather2D: same as ScatterGather but supports multiple containers.
    */
-  enum struct ProxyBehaviour { Compress, Contiguous, ScalarFill, ScatterGather };
+  enum struct ProxyBehaviour { Compress, Contiguous, ScalarFill, ScatterGather, ScatterGather2D };
 } // namespace LHCb::Pr
 
 /** Helper macros for defining LHCb::Pr::Proxy specialisations
@@ -269,15 +270,28 @@ namespace LHCb::Pr::detail {
       Behaviour == ProxyBehaviour::Compress,
       // Compress
       std::tuple<int, typename SIMDWrapper::type_map_t<Backend>::mask_v>,
-      std::conditional_t<Behaviour == ProxyBehaviour::Contiguous || Behaviour == ProxyBehaviour::ScalarFill,
-                         // Contiguous or ScalarFill
-                         int,
-                         std::conditional_t<Behaviour == ProxyBehaviour::ScatterGather,
-                                            // ScatterGather
-                                            typename SIMDWrapper::type_map_t<Backend>::int_v,
-                                            // Someone added a new ProxyBehaviour and forgot to
-                                            // update offset_t
-                                            void>>>;
+      std::conditional_t<
+          Behaviour == ProxyBehaviour::Contiguous || Behaviour == ProxyBehaviour::ScalarFill,
+          // Contiguous or ScalarFill
+          int,
+          std::conditional_t<Behaviour == ProxyBehaviour::ScatterGather,
+                             // ScatterGather
+                             typename SIMDWrapper::type_map_t<Backend>::int_v,
+                             std::conditional_t<Behaviour == ProxyBehaviour::ScatterGather2D,
+                                                // ScatterGather2D
+                                                std::tuple<typename SIMDWrapper::type_map_t<Backend>::int_v,
+                                                           typename SIMDWrapper::type_map_t<Backend>::int_v,
+                                                           typename SIMDWrapper::type_map_t<Backend>::mask_v>,
+                                                // Someone added a new ProxyBehaviour and forgot to
+                                                // update offset_t
+                                                void>>>>;
+
+  template <ProxyBehaviour Behaviour, SIMDWrapper::InstructionSet Backend, typename ContainerType>
+  using container_t = std::conditional_t<Behaviour == ProxyBehaviour::ScatterGather2D,
+                                         // ScatterGather2D
+                                         std::array<ContainerType*, SIMDWrapper::type_map_t<Backend>::size>,
+                                         // others
+                                         ContainerType*>;
 
   // Make a new proxy type that inherits from all the component ones
   // The data structure looks something like:
diff --git a/Event/EventBase/tests/src/test_SOACollection.cpp b/Event/EventBase/tests/src/test_SOACollection.cpp
index 9c6a07064e9cb4a21ab110dbf46110db6ae27d53..1629d8fb2906db5d52ea2f0df609bb11232e2c0f 100644
--- a/Event/EventBase/tests/src/test_SOACollection.cpp
+++ b/Event/EventBase/tests/src/test_SOACollection.cpp
@@ -411,3 +411,102 @@ BOOST_AUTO_TEST_CASE( test_weird_container_2 ) {
   test.resize( test.capacity() + 37 );
   BOOST_CHECK_EQUAL( test.overaligned_byte( 0 ), 42 );
 }
+
+struct RecursiveTest;
+struct ChildIndex : index2D_field<RecursiveTest> {};
+struct ChildIndexArray : indices2D_field<RecursiveTest, 30, 5> {};
+struct ChildNumber : int_field {};
+
+struct RecursiveTest : public SOACollection<RecursiveTest, ChildIndex, ChildIndexArray, ChildNumber, Float> {
+
+  template <typename Tag>
+  auto containers() {
+    if constexpr ( std::is_same_v<Tag, ChildIndex> || std::is_same_v<Tag, ChildIndexArray> ) { return childContainers; }
+  }
+
+  std::vector<RecursiveTest*> childContainers;
+};
+
+template <typename Tag, typename TagSize, typename TagValue, typename Proxy, typename data_t, typename operator_t>
+data_t accumulate( Proxy proxy, data_t start, operator_t op ) {
+  using int_v  = typename Proxy::simd_t::int_v;
+  using mask_v = typename Proxy::simd_t::mask_v;
+  data_t acc   = start;
+  int_v  size  = proxy.template get<TagSize>();
+  mask_v mask  = ( 0 < size ) && proxy.loop_mask();
+  int    i     = 0;
+  while ( any( mask ) ) {
+    auto child = proxy.template field<Tag>( i ).get( mask );
+    acc        = op( acc, child.template get<TagValue>() );
+    mask       = ( ++i < size ) && mask;
+  }
+  return acc;
+}
+
+template <typename Tag, typename TagSize, typename TagValue, typename Proxy, typename data_t, typename operator_t>
+data_t accumulate_leaves( Proxy proxy, data_t start, operator_t op ) {
+  using int_v  = typename Proxy::simd_t::int_v;
+  using mask_v = typename Proxy::simd_t::mask_v;
+
+  int_v  size = proxy.template get<TagSize>();
+  data_t acc  = select( size == 0, proxy.template get<TagValue>(), start );
+
+  mask_v mask = ( 0 < size ) && proxy.loop_mask();
+  int    i    = 0;
+  while ( any( mask ) ) {
+    auto child = proxy.template field<Tag>( i ).get( mask );
+    acc        = op( acc, accumulate_leaves<Tag, TagSize, TagValue>( child, start, op ) );
+    mask       = ( ++i < size ) && mask;
+  }
+  return acc;
+}
+
+BOOST_AUTO_TEST_CASE( test_recursive ) {
+  RecursiveTest test1{};
+  RecursiveTest test2{};
+  RecursiveTest test3{};
+
+  test1.childContainers.push_back( &test2 );
+  test1.childContainers.push_back( &test3 );
+  test2.childContainers.push_back( &test1 );
+  test2.childContainers.push_back( &test3 );
+  test3.childContainers.push_back( &test1 );
+  test3.childContainers.push_back( &test2 );
+
+  for ( int i = 0; i < 16; i++ ) {
+    auto a = test1.emplace_back<SIMDWrapper::InstructionSet::Scalar>();
+    a.field<ChildIndex>().set( ( i / 4 ) % 2, i % 4 );
+    for ( int j = 0; j < 5; j++ ) a.field<ChildIndexArray>( j ).set( ( i / 4 ) % 2, ( i + j + 1 ) % 4 );
+    a.field<ChildNumber>().set( 2 );
+    a.field<Float>().set( 100.f + i );
+
+    auto b = test2.emplace_back<SIMDWrapper::InstructionSet::Scalar>();
+    b.field<ChildIndex>().set( ( i / 2 ) % 2, i % 4 );
+    for ( int j = 0; j < 5; j++ ) b.field<ChildIndexArray>( j ).set( ( i / 2 + 1 ) % 2, ( i + j + 1 ) % 4 );
+    b.field<ChildNumber>().set( 1 );
+    b.field<Float>().set( 200.f + i );
+
+    auto c = test3.emplace_back<SIMDWrapper::InstructionSet::Scalar>();
+    c.field<ChildIndex>().set( ( i / 2 ) % 2, i % 4 );
+    c.field<ChildNumber>().set( 0 );
+    c.field<Float>().set( 300.f + i );
+  }
+
+  auto test1_simd = test1.simd<SIMDWrapper::InstructionSet::Best>();
+  using simd      = SIMDWrapper::best::types;
+
+  auto proxy = test1_simd.gather2D( simd::indices(), simd::indices() < simd::size );
+
+  std::cout << "test1.float: " << proxy.get<Float>() << std::endl;
+  std::cout << "test1.child.float: " << proxy.get<ChildIndex>().get<Float>() << std::endl;
+  std::cout << "test1.child.child.float: " << proxy.get<ChildIndex>().get<ChildIndex>().get<Float>() << std::endl;
+
+  std::cout << "test1.child[0].float: " << proxy.get<ChildIndexArray>( 0 ).get<Float>() << std::endl;
+  std::cout << "test1.child[1].float: " << proxy.get<ChildIndexArray>( 1 ).get<Float>() << std::endl;
+
+  std::cout << "accumulate: "
+            << accumulate<ChildIndexArray, ChildNumber, Float>( proxy, simd::float_v{0.f}, std::plus<>{} ) << std::endl;
+  std::cout << "accumulate leaves: "
+            << accumulate_leaves<ChildIndexArray, ChildNumber, Float>( proxy, simd::float_v{0.f}, std::plus<>{} )
+            << std::endl;
+}
\ No newline at end of file
diff --git a/Event/PhysEvent/include/Event/Particle_v2.h b/Event/PhysEvent/include/Event/Particle_v2.h
index 92c4c0a0880b24afa466a13118ffa6fe1d8474a5..51921cf0b4dbc37e7191bb1df170cf7e36c50dbd 100644
--- a/Event/PhysEvent/include/Event/Particle_v2.h
+++ b/Event/PhysEvent/include/Event/Particle_v2.h
@@ -408,7 +408,7 @@ namespace LHCb::Event {
       }
       [[nodiscard]] auto descendant_unique_id( std::size_t n_child ) const {
         return UniqueIDGenerator::ID<typename simd_t::int_v>{
-            this->template get<CompositeTags::DescendantUniqueIDs>( n_child ), this->container().unique_id_gen_tag()};
+            this->template get<CompositeTags::DescendantUniqueIDs>( n_child ), this->container()->unique_id_gen_tag()};
       }
       [[nodiscard]] auto momCovElement( std::size_t i, std::size_t j ) const {
         return this->template get<CompositeTags::FourMomCov>( sym_store_idx( i, j ) );
@@ -416,7 +416,7 @@ namespace LHCb::Event {
       [[nodiscard]] auto momPosCovElement( std::size_t i, std::size_t j ) const {
         return this->template get<CompositeTags::PositionFourMomCov>( i, j );
       }
-      [[nodiscard]] auto numDescendants() const { return this->container().numDescendants(); }
+      [[nodiscard]] auto numDescendants() const { return this->container()->numDescendants(); }
       [[nodiscard]] auto posCovElement( std::size_t i, std::size_t j ) const {
         return this->template get<CompositeTags::PositionCov>( sym_store_idx( i, j ) );
       }
@@ -498,8 +498,8 @@ namespace LHCb::Event {
       }
       [[nodiscard]] auto mass() const { return sqrt( this->mass2() ); }
       // Accessors for container-level information
-      [[nodiscard]] auto numChildren() const { return this->container().numChildren(); }
-      [[nodiscard]] auto zipIdentifier() const { return this->container().zipIdentifier(); }
+      [[nodiscard]] auto numChildren() const { return this->container()->numChildren(); }
+      [[nodiscard]] auto zipIdentifier() const { return this->container()->zipIdentifier(); }
     };
     template <SIMDWrapper::InstructionSet simd, Pr::ProxyBehaviour behaviour, typename ContainerType>
     using proxy_type = Proxy<simd, behaviour, ContainerType>;
diff --git a/Event/TrackEvent/include/Event/PrTracksTag.h b/Event/TrackEvent/include/Event/PrTracksTag.h
index 744f2b449814549323dd5d99f3e6e38d1a7e300d..f568a56b50dc21130ff2f6924195af0b2dcfd086 100644
--- a/Event/TrackEvent/include/Event/PrTracksTag.h
+++ b/Event/TrackEvent/include/Event/PrTracksTag.h
@@ -56,14 +56,15 @@ namespace LHCb::Event {
   template <std::size_t... N>
   struct states_field : floats_field<N..., 6> {
     template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
-              typename ContainerType, typename... Ts>
+              typename ContainerTypeBase, typename... Ts>
     struct NDStateProxy {
-      using simd_t     = SIMDWrapper::type_map_t<Simd>;
-      using type       = typename Tag::template vec_type<simd_t>;
-      using OffsetType = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
-      using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts..., std::size_t>;
+      using simd_t        = SIMDWrapper::type_map_t<Simd>;
+      using type          = typename Tag::template vec_type<simd_t>;
+      using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+      using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
+      using proxy_type    = NDNumericProxy<Tag, Simd, Behaviour, ContainerTypeBase, Ts..., std::size_t>;
 
-      NDStateProxy( ContainerType* container, OffsetType offset, Ts... Is )
+      NDStateProxy( ContainerType container, OffsetType offset, Ts... Is )
           : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
 
       template <StateParameters::StateVector i>
@@ -134,7 +135,7 @@ namespace LHCb::Event {
       constexpr auto get() const { return *this; }
 
     private:
-      ContainerType*          m_container;
+      ContainerType           m_container;
       OffsetType              m_offset;
       const std::tuple<Ts...> m_indices;
     };
@@ -151,14 +152,15 @@ namespace LHCb::Event {
   template <std::size_t... N>
   struct pos_dirs_field : floats_field<N..., 5> {
     template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
-              typename ContainerType, typename... Ts>
+              typename ContainerTypeBase, typename... Ts>
     struct NDStateProxy {
-      using simd_t     = SIMDWrapper::type_map_t<Simd>;
-      using type       = typename Tag::template vec_type<simd_t>;
-      using OffsetType = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
-      using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts..., std::size_t>;
+      using simd_t        = SIMDWrapper::type_map_t<Simd>;
+      using type          = typename Tag::template vec_type<simd_t>;
+      using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+      using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
+      using proxy_type    = NDNumericProxy<Tag, Simd, Behaviour, ContainerTypeBase, Ts..., std::size_t>;
 
-      NDStateProxy( ContainerType* container, OffsetType offset, Ts... Is )
+      NDStateProxy( ContainerType container, OffsetType offset, Ts... Is )
           : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
 
       template <PosDirParameters::PosDirVector i>
@@ -221,7 +223,7 @@ namespace LHCb::Event {
       constexpr auto get() const { return *this; }
 
     private:
-      ContainerType*          m_container;
+      ContainerType           m_container;
       OffsetType              m_offset;
       const std::tuple<Ts...> m_indices;
     };
@@ -248,14 +250,15 @@ namespace LHCb::Event {
   template <std::size_t... N>
   struct state_covs_field : floats_field<N..., 15> {
     template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
-              typename ContainerType, typename... Ts>
+              typename ContainerTypeBase, typename... Ts>
     struct StateCovProxy {
-      using simd_t     = SIMDWrapper::type_map_t<Simd>;
-      using type       = typename Tag::template vec_type<simd_t>;
-      using OffsetType = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
-      using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts..., std::size_t>;
+      using simd_t        = SIMDWrapper::type_map_t<Simd>;
+      using type          = typename Tag::template vec_type<simd_t>;
+      using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+      using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
+      using proxy_type    = NDNumericProxy<Tag, Simd, Behaviour, ContainerTypeBase, Ts..., std::size_t>;
 
-      StateCovProxy( ContainerType* container, OffsetType offset, Ts... Is )
+      StateCovProxy( ContainerType container, OffsetType offset, Ts... Is )
           : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
 
       template <StateParameters::CovMatrix i>
@@ -365,7 +368,7 @@ namespace LHCb::Event {
       constexpr auto& get() const { return *this; }
 
     private:
-      ContainerType*          m_container;
+      ContainerType           m_container;
       OffsetType              m_offset;
       const std::tuple<Ts...> m_indices;
     };
@@ -380,14 +383,15 @@ namespace LHCb::Event {
   template <std::size_t... N>
   struct vec3s_field : floats_field<N..., 3> {
     template <typename Tag, SIMDWrapper::InstructionSet Simd, LHCb::Pr::ProxyBehaviour Behaviour,
-              typename ContainerType, typename... Ts>
+              typename ContainerTypeBase, typename... Ts>
     struct Vec3Proxy {
-      using simd_t     = SIMDWrapper::type_map_t<Simd>;
-      using type       = typename Tag::template vec_type<simd_t>;
-      using OffsetType = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
-      using proxy_type = NDNumericProxy<Tag, Simd, Behaviour, ContainerType, Ts..., std::size_t>;
+      using simd_t        = SIMDWrapper::type_map_t<Simd>;
+      using type          = typename Tag::template vec_type<simd_t>;
+      using OffsetType    = LHCb::Pr::detail::offset_t<Behaviour, Simd>;
+      using ContainerType = LHCb::Pr::detail::container_t<Behaviour, Simd, ContainerTypeBase>;
+      using proxy_type    = NDNumericProxy<Tag, Simd, Behaviour, ContainerTypeBase, Ts..., std::size_t>;
 
-      Vec3Proxy( ContainerType* container, OffsetType offset, Ts... Is )
+      Vec3Proxy( ContainerType container, OffsetType offset, Ts... Is )
           : m_container( container ), m_offset( offset ), m_indices( Is... ) {}
 
       template <PosDirParameters::PosDirVector i>
@@ -426,7 +430,7 @@ namespace LHCb::Event {
       constexpr auto get() const { return *this; }
 
     private:
-      ContainerType*          m_container;
+      ContainerType           m_container;
       OffsetType              m_offset;
       const std::tuple<Ts...> m_indices;
     };
diff --git a/Kernel/LHCbMath/include/LHCbMath/SIMDWrapper.h b/Kernel/LHCbMath/include/LHCbMath/SIMDWrapper.h
index 1a63bd261f9c249bca4e58994871dea1d0581871..f2475330dcb1fb52b00c5b092771775516b7ec3b 100644
--- a/Kernel/LHCbMath/include/LHCbMath/SIMDWrapper.h
+++ b/Kernel/LHCbMath/include/LHCbMath/SIMDWrapper.h
@@ -220,6 +220,7 @@ namespace SIMDWrapper {
       constexpr friend bool none( mask_v mask ) { return mask.data == 0; }
       constexpr friend bool any( mask_v mask ) { return mask.data != 0; }
       constexpr friend bool testbit( mask_v mask, int ) { return mask.data == 1; }
+      constexpr friend int  ffs( mask_v mask ) { return mask.data; }
 
       friend std::ostream& operator<<( std::ostream& os, mask_v mask ) { return os << "scalar{" << mask.data << "}"; }
 
@@ -520,6 +521,8 @@ namespace SIMDWrapper {
       friend bool none( mask_v mask ) { return movemask_u32( mask.native() ) == 0x0; }
       friend bool any( mask_v mask ) { return movemask_u32( mask.native() ) != 0x0; }
       friend bool testbit( mask_v mask, int bit ) { return ( movemask_u32( mask.native() ) & ( 1 << bit ) ) != 0; }
+      // TODO: c++20 countl_zero
+      friend int ffs( mask_v mask ) { return __builtin_ffs( movemask_u32( mask.native() ) ); }
 
       friend std::ostream& operator<<( std::ostream& os, mask_v x ) {
         return print_bitset<4>( os, static_cast<unsigned long long>( movemask_u32( x.native() ) ), "neon" );
@@ -902,6 +905,8 @@ namespace SIMDWrapper {
       friend bool none( mask_v mask ) { return _mm_movemask_ps( mask.native() ) == 0x0; }
       friend bool any( mask_v mask ) { return _mm_movemask_ps( mask.native() ) != 0x0; }
       friend bool testbit( mask_v mask, int bit ) { return ( _mm_movemask_ps( mask.native() ) & ( 1 << bit ) ) != 0; }
+      // TODO: c++20 countl_zero
+      friend int ffs( mask_v mask ) { return __builtin_ffs( _mm_movemask_ps( mask.native() ) ); }
 
       friend std::ostream& operator<<( std::ostream& os, mask_v x ) {
         return print_bitset<4>( os, static_cast<unsigned long long>( _mm_movemask_ps( x.native() ) ), "sse" );
@@ -1345,6 +1350,8 @@ namespace SIMDWrapper {
       friend bool testbit( mask_v mask, const int bit ) {
         return ( _mm256_movemask_ps( mask.native() ) & ( 1 << bit ) ) != 0;
       }
+      // TODO: c++20 countl_zero
+      friend int ffs( mask_v mask ) { return __builtin_ffs( _mm256_movemask_ps( mask.native() ) ); }
 
       __m256 native() const { return data; }
 
@@ -1694,6 +1701,8 @@ namespace SIMDWrapper {
       friend bool none( mask_v mask ) { return mask.native() == 0x00; }
       friend bool any( mask_v mask ) { return mask.native() != 0x00; }
       friend bool testbit( mask_v mask, int bit ) { return ( mask.native() & ( 1 << bit ) ) != 0; }
+      // TODO: c++20 countl_zero
+      friend int ffs( mask_v mask ) { return __builtin_ffs( mask.native() ); }
 
       friend std::ostream& operator<<( std::ostream& os, mask_v x ) {
         return print_bitset<8>( os, x.native(), "avx256" );
@@ -2013,6 +2022,8 @@ namespace SIMDWrapper {
       friend bool none( mask_v mask ) { return mask.native() == 0x0000; }
       friend bool any( mask_v mask ) { return mask.native() != 0x0000; }
       friend bool testbit( mask_v mask, int bit ) { return ( mask.native() & ( 1 << bit ) ) != 0; }
+      // TODO: c++20 countl_zero
+      friend int ffs( mask_v mask ) { return __builtin_ffs( mask.native() ); }
 
       friend std::ostream& operator<<( std::ostream& os, mask_v x ) {
         return print_bitset<16>( os, x.native(), "avx512" );