Merge branch 'GSFConvolution_KLmerges_array' into 'master'

GSF : Use std::array with alignas, instead of AlignedDynArray. See merge request atlas/athena!37807

Merge branch 'GSFConvolution_KLmerges_array' into 'master'
GSF : Use std::array with alignas, instead of AlignedDynArray. See merge request atlas/athena!37807
76199ff0 · Edward Moyse · e3b5433a · af289d47 · 76199ff0 · 76199ff0
Commit 76199ff0 authored 4 years ago by Edward Moyse
--- a/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/GsfConstants.h
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/GsfConstants.h
@@ -11,6 +11,7 @@
 #define GSFCONSTANTS_H

 #include <cstdint>
+#include <cstddef>
 namespace GSFConstants {

 /**
@@ -30,8 +31,8 @@ namespace GSFConstants {
 * The max numbers for N , M  are enforced in configuration.
 * It is an error to configure for more.
 *
- * They lead to a max allowed NXM after convolution
- * trying to somehow pass (by-passing config)
+ * They lead to a max allowed NXM after convolution.
+ * Trying to somehow pass (by-passing the config)
 * leads to an exception.
 *
 * Furthermore, the  number of coefficients is also
@@ -42,8 +43,8 @@ namespace GSFConstants {
 */

 /// Maximum number of Gaussian components for the
-/// state description (default is 12)
-constexpr int8_t maxNumberofStateComponents = 14;
+/// state description.
+constexpr int8_t maxNumberofStateComponents = 12;
 /// Maximum number of Gaussian components for the
 /// Bethe Heitler description
 constexpr int8_t maxNumberofBHComponents = 6;
@@ -52,12 +53,16 @@ constexpr int8_t polynomialCoefficients = 6;

 /**
 * The maximum size State x Bethe-Heitler components
- * The typical number we use is 6x12 = 72.
- * Max here is 6x14 = 84. As in literature there are examples
- * up tio 14 state components
+ * The typical number we use is the max 6x12 = 72 i.e as
+ * we try to have the maximum practical precision for the GSF.
 */
 constexpr int8_t maxComponentsAfterConvolution =
  maxNumberofBHComponents * maxNumberofStateComponents;

+/**
+ * @brief Alignment used  for SIMD operations
+ * internally to GSF.
+ */
+constexpr size_t alignment = 32;
 }
 #endif
--- a/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/IMultiStateMaterialEffects.h
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/IMultiStateMaterialEffects.h
@@ -40,10 +40,10 @@ public:
  {
    std::array<double, GSFConstants::maxNumberofBHComponents> weights = {};
    std::array<double, GSFConstants::maxNumberofBHComponents> deltaPs = {};
-    alignas(32)
+    alignas(GSFConstants::alignment)
      std::array<AmgVector(5),
                 GSFConstants::maxNumberofBHComponents> deltaParameters = {};
-    alignas(32)
+    alignas(GSFConstants::alignment)
      std::array<AmgSymMatrix(5),
                 GSFConstants::maxNumberofBHComponents> deltaCovariances = {};


--- a/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/KLGaussianMixtureReduction.h
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/TrkGaussianSumFilter/KLGaussianMixtureReduction.h
@@ -74,17 +74,14 @@
 #define KLGaussianMixReductionUtils_H

 #include "CxxUtils/features.h"
+#include "TrkGaussianSumFilter/GsfConstants.h"
+#include <array>
+#include <vector>
 #include <cstdint>
 #include <utility>
-#include <vector>

 namespace GSFUtils {

-/**
- * @brief Alignment used  for SIMD
- */
-constexpr size_t alignment = 32;
-
 /**
 * @brief struct representing 1D component
 * Negative weight means invalidated component
@@ -97,6 +94,19 @@ struct Component1D
  double weight = 0.;
 };

+/**
+ * @brief struct representing an array of 1D component.
+ * with the maximum size we can have after convolution
+ * with material effects.
+ */
+struct Component1DArray
+{
+  alignas(GSFConstants::alignment)
+    std::array<Component1D,
+               GSFConstants::maxComponentsAfterConvolution> components{};
+  int32_t numComponents = 0;
+};
+
 /**
 * @brief Merge the componentsIn and return
 * which componets got merged
@@ -106,13 +116,11 @@ struct Component1D
 * will cause a runtime exception
 *
 * Furthemore, the input component array is assumed to be
- * GSFUtils::alignment aligned.
+ * GSFConstants::alignment aligned.
 * Can be created via the AlignedDynArray.
 */
 std::vector<std::pair<int8_t, int8_t>>
-findMerges(Component1D* componentsIn,
-           const int8_t inputSize,
-           const int8_t reducedSize);
+findMerges(Component1DArray& componentsIn, const int8_t reducedSize);

 /**
 * @brief For finding the index of the minumum pairwise distance

--- a/Tracking/TrkFitter/TrkGaussianSumFilter/src/GsfMaterialMixtureConvolution.cxx
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/src/GsfMaterialMixtureConvolution.cxx
@@ -244,7 +244,7 @@ Trk::GsfMaterialMixtureConvolution::update(
    // check vectors have the same size
    if (caches[i].numWeights != caches[i].numDeltaPs) {
      ATH_MSG_WARNING("Inconsistent number of components in the updator!!! no "
-                    "material effect will be applied");
+                      "material effect will be applied");
      caches[i].resetAndAddDummyValues();
    }

@@ -276,14 +276,14 @@ Trk::GsfMaterialMixtureConvolution::update(
  }

  // Fill information for to calculate which components to merge
-  // Inaddition scan all components for covariance matrices. If one or more
+  // In addition scan all components for covariance matrices. If one or more
  // component is missing an error matrix, component reduction is impossible.
  bool componentWithoutMeasurement = false;

-  GSFUtils::AlignedDynArray<GSFUtils::Component1D, GSFUtils::alignment>
-    components(n);
-  size_t k(0);
+  GSFUtils::Component1DArray componentsArray;
+  componentsArray.numComponents = n;
  std::vector<std::pair<size_t, size_t>> indices(n);
+  size_t k(0);
  for (size_t i(0); i < inputState.size(); ++i) {
    for (size_t j(0); j < caches[i].numWeights; ++j) {
      const AmgSymMatrix(5)* measuredCov = inputState[i].first->covariance();
@@ -295,21 +295,22 @@ Trk::GsfMaterialMixtureConvolution::update(
        componentWithoutMeasurement = true;
      }

-      components[k].mean = caches[i].deltaParameters[j][Trk::qOverP];
-      components[k].cov = cov;
-      components[k].invCov = cov > 0 ? 1. / cov : 1e10;
-      components[k].weight = caches[i].weights[j];
+      componentsArray.components[k].mean =
+        caches[i].deltaParameters[j][Trk::qOverP];
+      componentsArray.components[k].cov = cov;
+      componentsArray.components[k].invCov = cov > 0 ? 1. / cov : 1e10;
+      componentsArray.components[k].weight = caches[i].weights[j];
      indices[k] = { i, j };
      ++k;
    }
  }

  if (componentWithoutMeasurement) {
-    auto *result = std::max_element(
-      components.begin(), components.end(), [](const auto& a, const auto& b) {
-        return a.weight < b.weight;
-      });
-    auto index = std::distance(components.begin(), result);
+    auto* result = std::max_element(
+      componentsArray.components.data(),
+      componentsArray.components.data() + componentsArray.numComponents,
+      [](const auto& a, const auto& b) { return a.weight < b.weight; });
+    auto index = std::distance(componentsArray.components.data(), result);

    // Build the first TP
    size_t stateIndex = indices[index].first;
@@ -342,9 +343,9 @@ Trk::GsfMaterialMixtureConvolution::update(

  // Gather the merges -- order is important -- RHS is smaller than LHS
  std::vector<std::pair<int8_t, int8_t>> merges;
-  if (n > m_maximumNumberOfComponents)
-    merges = findMerges(components.buffer(), n, m_maximumNumberOfComponents);
-
+  if (n > m_maximumNumberOfComponents) {
+    merges = findMerges(componentsArray, m_maximumNumberOfComponents);
+  }
  // Merge components
  MultiComponentStateAssembler::Cache assemblerCache;
  int nMerges(0);

--- a/Tracking/TrkFitter/TrkGaussianSumFilter/src/KLGaussianMixtureReduction.cxx
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/src/KLGaussianMixtureReduction.cxx
@@ -9,6 +9,7 @@
 #include "TrkGaussianSumFilter/GsfConstants.h"
 #include <limits>
 #include <stdexcept>
+#include <vector>
 #if !defined(__GNUC__)
 #define __builtin_assume_aligned(X, N) X
 #else
@@ -124,10 +125,10 @@ recalculateDistances(const Component1D* componentsIn,
                     const int32_t n)
 {
  const Component1D* components = static_cast<const Component1D*>(
-    __builtin_assume_aligned(componentsIn, alignment));
+    __builtin_assume_aligned(componentsIn, GSFConstants::alignment));

-  float* distances =
-    static_cast<float*>(__builtin_assume_aligned(distancesIn, alignment));
+  float* distances = static_cast<float*>(
+    __builtin_assume_aligned(distancesIn, GSFConstants::alignment));

  const int32_t j = mini;
  const int32_t indexConst = (j - 1) * j / 2;
@@ -167,9 +168,9 @@ calculateAllDistances(const Component1D* componentsIn,
 {

  const Component1D* components = static_cast<const Component1D*>(
-    __builtin_assume_aligned(componentsIn, alignment));
-  float* distances =
-    static_cast<float*>(__builtin_assume_aligned(distancesIn, alignment));
+    __builtin_assume_aligned(componentsIn, GSFConstants::alignment));
+  float* distances = static_cast<float*>(
+    __builtin_assume_aligned(distancesIn, GSFConstants::alignment));

  for (int32_t i = 1; i < n; ++i) {
    const int32_t indexConst = (i - 1) * i / 2;
@@ -187,8 +188,8 @@ calculateAllDistances(const Component1D* componentsIn,
 void
 resetDistances(float* distancesIn, const int32_t minj, const int32_t n)
 {
-  float* distances =
-    static_cast<float*>(__builtin_assume_aligned(distancesIn, alignment));
+  float* distances = static_cast<float*>(
+    __builtin_assume_aligned(distancesIn, GSFConstants::alignment));
  const int32_t j = minj;
  const int32_t indexConst = (j - 1) * j / 2;
  // Rows
@@ -210,33 +211,31 @@ namespace GSFUtils {
 * which componets got merged.
 */
 std::vector<std::pair<int8_t, int8_t>>
-findMerges(Component1D* componentsIn,
-           const int8_t inputSize,
-           const int8_t reducedSize)
+findMerges(Component1DArray& componentsIn, const int8_t reducedSize)
 {
  Component1D* components = static_cast<Component1D*>(
-    __builtin_assume_aligned(componentsIn, alignment));
+    __builtin_assume_aligned(componentsIn.components.data(), GSFConstants::alignment));
+
+  const int32_t n = componentsIn.numComponents;

  // Sanity check. Function  throw on invalid inputs
-  if (inputSize < 0 ||
-      inputSize > GSFConstants::maxComponentsAfterConvolution ||
-      reducedSize > inputSize) {
+  if (n < 0 || n > GSFConstants::maxComponentsAfterConvolution ||
+      reducedSize > n) {
    throw std::runtime_error("findMerges :Invalid InputSize or reducedSize");
  }
  // We need just one for the full duration of a job
  const static std::vector<triangularToIJ> convert = createToIJMaxRowCols();

-  // Based on the inputSize allocate enough space for the pairwise distances
-  const int8_t n = inputSize;
+  // Based on the inputSize n allocate enough space for the pairwise distances
  const int32_t nn = n * (n - 1) / 2;
  // We work with a  multiple of 8*floats (32 bytes).
  const int32_t nn2 = (nn & 7) == 0 ? nn : nn + (8 - (nn & 7));
-  AlignedDynArray<float, alignment> distances(
+  AlignedDynArray<float, GSFConstants::alignment> distances(
    nn2, std::numeric_limits<float>::max());

  // vector to be returned
  std::vector<std::pair<int8_t, int8_t>> merges;
-  merges.reserve(inputSize - reducedSize);
+  merges.reserve(n - reducedSize);
  // initial distance calculation
  calculateAllDistances(components, distances.buffer(), n);

@@ -291,7 +290,8 @@ int32_t
 findMinimumIndex(const float* distancesIn, const int n)
 {
  using namespace CxxUtils;
-  float* array = (float*)__builtin_assume_aligned(distancesIn, alignment);
+  float* array =
+    (float*)__builtin_assume_aligned(distancesIn, GSFConstants::alignment);
  const vec<int, 8> increment = { 8, 8, 8, 8, 8, 8, 8, 8 };
  vec<int, 8> indicesIn = { 0, 1, 2, 3, 4, 5, 6, 7 };
  vec<int, 8> minindices = indicesIn;
@@ -326,7 +326,8 @@ int32_t
 findMinimumIndex(const float* distancesIn, const int n)
 {
  using namespace CxxUtils;
-  float* array = (float*)__builtin_assume_aligned(distancesIn, alignment);
+  float* array =
+    (float*)__builtin_assume_aligned(distancesIn, GSFConstants::alignment);
  // Do 2 vectors of 4 elements , so 8 at time
  const vec<int, 4> increment = { 8, 8, 8, 8 };
  vec<int, 4> indices1 = { 0, 1, 2, 3 };
@@ -379,7 +380,8 @@ int32_t
 findMinimumIndex(const float* distancesIn, const int n)
 {
  using namespace CxxUtils;
-  float* array = (float*)__builtin_assume_aligned(distancesIn, alignment);
+  float* array =
+    (float*)__builtin_assume_aligned(distancesIn, GSFConstants::alignment);
  const vec<int, 4> increment = { 8, 8, 8, 8 };
  vec<int, 4> indices1 = { 0, 1, 2, 3 };
  vec<int, 4> indices2 = { 4, 5, 6, 7 };

--- a/Tracking/TrkFitter/TrkGaussianSumFilter/src/QuickCloseComponentsMultiStateMerger.cxx
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/src/QuickCloseComponentsMultiStateMerger.cxx
@@ -27,23 +27,24 @@ mergeFullDistArray(Trk::MultiComponentStateAssembler::Cache& cache,
                   Trk::MultiComponentState& statesToMerge,
                   const unsigned int maximumNumberOfComponents)
 {
+  Component1DArray componentsArray;
  const int32_t n = statesToMerge.size();
-  AlignedDynArray<Component1D, alignment> components(n);
+  componentsArray.numComponents = n;
  for (int32_t i = 0; i < n; ++i) {
    const AmgSymMatrix(5)* measuredCov = statesToMerge[i].first->covariance();
    const AmgVector(5)& parameters = statesToMerge[i].first->parameters();
    // Fill in infomation
    const double cov =
      measuredCov ? (*measuredCov)(Trk::qOverP, Trk::qOverP) : -1.;
-    components[i].mean = parameters[Trk::qOverP];
-    components[i].cov = cov;
-    components[i].invCov = cov > 0 ? 1. / cov : 1e10;
-    components[i].weight = statesToMerge[i].second;
+    componentsArray.components[i].mean = parameters[Trk::qOverP];
+    componentsArray.components[i].cov = cov;
+    componentsArray.components[i].invCov = cov > 0 ? 1. / cov : 1e10;
+    componentsArray.components[i].weight = statesToMerge[i].second;
  }

  // Gather the merges
  const std::vector<std::pair<int8_t, int8_t>> merges =
-    findMerges(components.buffer(), n, maximumNumberOfComponents);
+    findMerges(componentsArray, maximumNumberOfComponents);

  // Do the full 5D calculations of the merge
  for (const auto& mergePair : merges) {

--- a/Tracking/TrkFitter/TrkGaussianSumFilter/test/testMergeComponents.cxx
+++ b/Tracking/TrkFitter/TrkGaussianSumFilter/test/testMergeComponents.cxx
@@ -89,16 +89,17 @@ constexpr std::array<Component1D, n> input = {
 int
 main()
 {
-  AlignedDynArray<Component1D, alignment> components(n);
+  GSFUtils::Component1DArray componentsArray;
+  componentsArray.numComponents = n;
  // Create an array of all components to be merged
  for (int8_t i = 0; i < n; ++i) {
-    components[i].mean = input[i].mean;
-    components[i].cov = input[i].cov;
-    components[i].invCov = input[i].invCov;
-    components[i].weight = input[i].weight;
+    componentsArray.components[i].mean = input[i].mean;
+    componentsArray.components[i].cov = input[i].cov;
+    componentsArray.components[i].invCov = input[i].invCov;
+    componentsArray.components[i].weight = input[i].weight;
  }
  std::vector<std::pair<int8_t, int8_t>> mergeOrder =
-    findMerges(components.buffer(), n, 12);
+    findMerges(componentsArray, 12);
  for (const auto& i : mergeOrder) {
    std::cout << "[" << static_cast<int>(i.first) << ", "
              << static_cast<int>(i.second) << "]" << '\n';