diff --git a/Tools/WorkflowTestRunner/python/References.py b/Tools/WorkflowTestRunner/python/References.py index e34f95a51cf328bed556a4fb93f17dbe9385a77b..92d03b566e6c61ab35bfcb13d0977cce0d9b268f 100644 --- a/Tools/WorkflowTestRunner/python/References.py +++ b/Tools/WorkflowTestRunner/python/References.py @@ -24,9 +24,9 @@ references_map = { "d1759": "v10", # Reco "q442": "v41", - "q443": "v30", + "q443": "v31", "q445": "v53", - "q449": "v56", + "q449": "v57", # Derivations "data_PHYS_Run2": "v15", "data_PHYS_Run3": "v14", diff --git a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/CMakeLists.txt b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/CMakeLists.txt index 44caf19e9895e7deda3e230814f5eedd80143046..a5c02dfe53fdbb89c6c509fec2c3f2eef715966c 100644 --- a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/CMakeLists.txt +++ b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/CMakeLists.txt @@ -32,6 +32,5 @@ atlas_add_test(ut_GSF_testHorner atlas_add_test(ut_GSF_findIndexOfMinimum SOURCES test/testFindIndexOfMinimum.cxx - LINK_LIBRARIES CxxUtils TrkGaussianSumFilterUtilsLib - POST_EXEC_SCRIPT nopost.sh ) + LINK_LIBRARIES CxxUtils TrkGaussianSumFilterUtilsLib) diff --git a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/TrkGaussianSumFilterUtils/GSFFindIndexOfMimimum.h b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/TrkGaussianSumFilterUtils/GSFFindIndexOfMimimum.h index 2f748eb3d73133461fd53a7fc91493d54eaebbd6..3bfd0f6ff7057e494d8316112b15e92d75c3c627 100644 --- a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/TrkGaussianSumFilterUtils/GSFFindIndexOfMimimum.h +++ b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/TrkGaussianSumFilterUtils/GSFFindIndexOfMimimum.h @@ -32,10 +32,9 @@ * * - A "C" implementation * - A "STL" implementation - * - A "Vec" implementation using blend - * - A "Vec" implementation that is faster - * than the above when the inputs are not - * ordered. + * - A "Vec" implementation always tracking the index + * - A "Vec" implementation that updates the index when an new minimum is + * foiund. This can be faster than the above when the inputs are not ordered. * * We provide a convenient entry method * to select in compile time an implementation @@ -83,7 +82,7 @@ int32_t scalarSTL(const float* distancesIn, int n) { // index of minimum vec with blend ATH_ALWAYS_INLINE -int32_t vecBlend(const float* distancesIn, int n) { +int32_t vecAlwaysTrackIdx(const float* distancesIn, int n) { using namespace CxxUtils; const float* array = CxxUtils::assume_aligned<GSFConstants::alignment>(distancesIn); @@ -94,19 +93,19 @@ int32_t vecBlend(const float* distancesIn, int n) { vec<int, 4> indices3 = {8, 9, 10, 11}; vec<int, 4> indices4 = {12, 13, 14, 15}; - vec<int, 4> minindices1 = indices1; - vec<int, 4> minindices2 = indices2; - vec<int, 4> minindices3 = indices3; - vec<int, 4> minindices4 = indices4; + vec<int, 4> minIndices1 = indices1; + vec<int, 4> minIndices2 = indices2; + vec<int, 4> minIndices3 = indices3; + vec<int, 4> minIndices4 = indices4; - vec<float, 4> minvalues1; - vec<float, 4> minvalues2; - vec<float, 4> minvalues3; - vec<float, 4> minvalues4; - vload(minvalues1, array); - vload(minvalues2, array + 4); - vload(minvalues3, array + 8); - vload(minvalues4, array + 12); + vec<float, 4> minValues1; + vec<float, 4> minValues2; + vec<float, 4> minValues3; + vec<float, 4> minValues4; + vload(minValues1, array); + vload(minValues2, array + 4); + vload(minValues3, array + 8); + vload(minValues4, array + 12); vec<float, 4> values1; vec<float, 4> values2; @@ -116,61 +115,60 @@ int32_t vecBlend(const float* distancesIn, int n) { // 1 vload(values1, array + i); // 0-3 indices1 = indices1 + increment; - vec<int, 4> lt1 = values1 < minvalues1; - vselect(minindices1, indices1, minindices1, lt1); - vmin(minvalues1, values1, minvalues1); + vec<int, 4> lt1 = values1 < minValues1; + vselect(minIndices1, indices1, minIndices1, lt1); + vmin(minValues1, values1, minValues1); // 2 vload(values2, array + i + 4); // 4-7 indices2 = indices2 + increment; - vec<int, 4> lt2 = values2 < minvalues2; - vselect(minindices2, indices2, minindices2, lt2); - vmin(minvalues2, values2, minvalues2); + vec<int, 4> lt2 = values2 < minValues2; + vselect(minIndices2, indices2, minIndices2, lt2); + vmin(minValues2, values2, minValues2); // 3 vload(values3, array + i + 8); // 8-11 indices3 = indices3 + increment; - vec<int, 4> lt3 = values3 < minvalues3; - vselect(minindices3, indices3, minindices3, lt3); - vmin(minvalues3, values3, minvalues3); + vec<int, 4> lt3 = values3 < minValues3; + vselect(minIndices3, indices3, minIndices3, lt3); + vmin(minValues3, values3, minValues3); // 4 vload(values4, array + i + 12); // 12-15 indices4 = indices4 + increment; - vec<int, 4> lt4 = values4 < minvalues4; - vselect(minindices4, indices4, minindices4, lt4); - vmin(minvalues4, values4, minvalues4); + vec<int, 4> lt4 = values4 < minValues4; + vselect(minIndices4, indices4, minIndices4, lt4); + vmin(minValues4, values4, minValues4); } - // Compare //1 with //2 minimum becomes 1 - vec<int, 4> lt12 = minvalues1 < minvalues2; - vselect(minindices1, minindices1, minindices2, lt12); - vmin(minvalues1, minvalues1, minvalues2); - // compare //3 with //4 minimum becomes 3 - vec<int, 4> lt34 = minvalues3 < minvalues4; - vselect(minindices3, minindices3, minindices4, lt34); - vmin(minvalues3, minvalues3, minvalues4); - // Compare //1 with //3 minimum becomes 1 - vec<int, 4> lt13 = minvalues1 < minvalues3; - vselect(minindices1, minindices1, minindices3, lt13); - vmin(minvalues1, minvalues1, minvalues3); - // Do the final calculation scalar way - int minindices[4]; - float minvalues[4]; - vstore(minvalues, minvalues1); - vstore(minindices, minindices1); - size_t minIndex = minindices[0]; - float minvalue = minvalues[0]; + float minValues[16]; + int minIndices[16]; + vstore(minValues, minValues1); + vstore(minValues + 4, minValues2); + vstore(minValues + 8, minValues3); + vstore(minValues + 12, minValues4); + vstore(minIndices, minIndices1); + vstore(minIndices + 4, minIndices2); + vstore(minIndices + 8, minIndices3); + vstore(minIndices + 12, minIndices4); - for (size_t i = 1; i < 4; ++i) { - const float value = minvalues[i]; - if (value < minvalue) { - minvalue = value; - minIndex = minindices[i]; + float minValue = minValues[0]; + int32_t minIndex = minIndices[0]; + for (size_t i = 1; i < 16; ++i) { + const float value = minValues[i]; + const int32_t index = minIndices[i]; + if (value < minValue) { + minValue = value; + minIndex = index; + } else if (value == minValue && index < minIndex) { + // we want to return the smallest index + // in case of 2 same values + minIndex = index; } } return minIndex; } + // index of minimum vec quite fast for the "unordered"/"random" case ATH_ALWAYS_INLINE -int32_t vecUnordered(const float* distancesIn, int n) { +int32_t vecUpdateIdxOnNewMin(const float* distancesIn, int n) { using namespace CxxUtils; const float* array = CxxUtils::assume_aligned<GSFConstants::alignment>(distancesIn); @@ -248,17 +246,18 @@ int32_t vecUnordered(const float* distancesIn, int n) { } // namespace findMinimumIndexDetail namespace findMinimumIndex { -enum Impl { VecUnordered = 0, VecBlend = 1, C = 2, STL = 3 }; +enum Impl { VecUpdateIdxOnNewMin = 0, VecAlwaysTrackIdx = 1, C = 2, STL = 3 }; template <enum Impl I> ATH_ALWAYS_INLINE int32_t impl(const float* distancesIn, int n) { - static_assert(I == VecUnordered || I == VecBlend || I == C || I == STL, - "Not a valid implementation chosen"); - if constexpr (I == VecUnordered) { - return findMinimumIndexDetail::vecUnordered(distancesIn, n); - } else if constexpr (I == VecBlend) { - return findMinimumIndexDetail::vecBlend(distancesIn, n); + static_assert( + I == VecUpdateIdxOnNewMin || I == VecAlwaysTrackIdx || I == C || I == STL, + "Not a valid implementation chosen"); + if constexpr (I == VecUpdateIdxOnNewMin) { + return findMinimumIndexDetail::vecUpdateIdxOnNewMin(distancesIn, n); + } else if constexpr (I == VecAlwaysTrackIdx) { + return findMinimumIndexDetail::vecAlwaysTrackIdx(distancesIn, n); } else if constexpr (I == C) { return findMinimumIndexDetail::scalarC(distancesIn, n); } else if constexpr (I == STL) { diff --git a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/share/ut_GSF_findIndexOfMinimum.ref b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/share/ut_GSF_findIndexOfMinimum.ref new file mode 100644 index 0000000000000000000000000000000000000000..ae65e3238c5f7372eb0c4918c4ab7600c201fcb2 --- /dev/null +++ b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/share/ut_GSF_findIndexOfMinimum.ref @@ -0,0 +1,4 @@ +C Minimum index : 17 with value 0 +STL Minimum index : 17 with value 0 +VecAlwaysTrackIdx Minimum index : 17 with value 0 +VecUpdateIdxOnNewMin Minimum index : 17 with value 0 diff --git a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/src/KLGaussianMixtureReduction.cxx b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/src/KLGaussianMixtureReduction.cxx index b9e7cc6ad632bd8602f0aac0fc3cf4a46a6f0006..bd97c5cf44fab9c1a0fd8054dad8372cadebb369 100644 --- a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/src/KLGaussianMixtureReduction.cxx +++ b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/src/KLGaussianMixtureReduction.cxx @@ -343,7 +343,7 @@ findMergesImpl(const Component1DArray& componentsIn, while (numberOfComponentsLeft > reducedSize) { // find pair with minimum distance const int32_t minIndex = - findMinimumIndex::impl<findMinimumIndex::VecBlend>( + findMinimumIndex::impl<findMinimumIndex::VecAlwaysTrackIdx>( distances.buffer(), nnpadded); const triangularToIJ conversion = convert(minIndex); int8_t minTo = conversion.I; diff --git a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/test/testFindIndexOfMinimum.cxx b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/test/testFindIndexOfMinimum.cxx index adf78cb23747b723fc86c46d07544b9e59c375bd..54db21ccd354a1aa03ac466c1a683a4e03bddf3a 100644 --- a/Tracking/TrkFitter/TrkGaussianSumFilterUtils/test/testFindIndexOfMinimum.cxx +++ b/Tracking/TrkFitter/TrkGaussianSumFilterUtils/test/testFindIndexOfMinimum.cxx @@ -17,9 +17,17 @@ struct InitArray { InitArray() : distances(N) { std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<> dis(0.001, 10.0); + std::uniform_real_distribution<> dis(0.001, 5.0); for (size_t i = 0; i < N; ++i) { distances[i] = dis(gen); + // At the end of the "vectorized loops" + // 36 will the 1st element of the second SIMD vec + // 1024 will the 1st element of the first SIMD vec + // 17 will the 2nd eleament of the fourth SIMD vec + // 40 will the 1st eleament of the third SIMD vec + if (i == 17 || i == 40 || i == 36 || i == 1024) { + distances[i] = 0.0; + } } } GSFUtils::AlignedDynArray<float, GSFConstants::alignment> distances; @@ -41,16 +49,16 @@ static void findMinimumIndexSTL() { } static void findMinimumIndexVecBlend() { - int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecBlend>( + int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecAlwaysTrackIdx>( initArray.distances.buffer(), N); - std::cout << "Vec Minimum index Blend : " << minIndex << " with value " + std::cout << "VecAlwaysTrackIdx Minimum index : " << minIndex << " with value " << initArray.distances[minIndex] << '\n'; } static void findMinimumIndexVecUnordered() { - int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecUnordered>( + int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecUpdateIdxOnNewMin>( initArray.distances.buffer(), N); - std::cout << "Vec Minimum index Unordered : " << minIndex << " with value " + std::cout << "VecUpdateIdxOnNewMin Minimum index : " << minIndex << " with value " << initArray.distances[minIndex] << '\n'; }