Skip to content
Snippets Groups Projects
Commit 21960c88 authored by Christos Anastopoulos's avatar Christos Anastopoulos Committed by Frank Winklmeier
Browse files

Document/Test/Fix the "stability" of find index of minimum impl

Document/Test/Fix the "stability" of find index of minimum impl
parent a496c7f5
No related branches found
No related tags found
1 merge request!67962Document/Test/Fix the "stability" of find index of minimum impl
......@@ -24,9 +24,9 @@ references_map = {
"d1759": "v10",
# Reco
"q442": "v41",
"q443": "v30",
"q443": "v31",
"q445": "v53",
"q449": "v56",
"q449": "v57",
# Derivations
"data_PHYS_Run2": "v15",
"data_PHYS_Run3": "v14",
......
......@@ -32,6 +32,5 @@ atlas_add_test(ut_GSF_testHorner
atlas_add_test(ut_GSF_findIndexOfMinimum
SOURCES test/testFindIndexOfMinimum.cxx
LINK_LIBRARIES CxxUtils TrkGaussianSumFilterUtilsLib
POST_EXEC_SCRIPT nopost.sh )
LINK_LIBRARIES CxxUtils TrkGaussianSumFilterUtilsLib)
......@@ -32,10 +32,9 @@
*
* - A "C" implementation
* - A "STL" implementation
* - A "Vec" implementation using blend
* - A "Vec" implementation that is faster
* than the above when the inputs are not
* ordered.
* - A "Vec" implementation always tracking the index
* - A "Vec" implementation that updates the index when an new minimum is
* foiund. This can be faster than the above when the inputs are not ordered.
*
* We provide a convenient entry method
* to select in compile time an implementation
......@@ -83,7 +82,7 @@ int32_t scalarSTL(const float* distancesIn, int n) {
// index of minimum vec with blend
ATH_ALWAYS_INLINE
int32_t vecBlend(const float* distancesIn, int n) {
int32_t vecAlwaysTrackIdx(const float* distancesIn, int n) {
using namespace CxxUtils;
const float* array =
CxxUtils::assume_aligned<GSFConstants::alignment>(distancesIn);
......@@ -94,19 +93,19 @@ int32_t vecBlend(const float* distancesIn, int n) {
vec<int, 4> indices3 = {8, 9, 10, 11};
vec<int, 4> indices4 = {12, 13, 14, 15};
vec<int, 4> minindices1 = indices1;
vec<int, 4> minindices2 = indices2;
vec<int, 4> minindices3 = indices3;
vec<int, 4> minindices4 = indices4;
vec<int, 4> minIndices1 = indices1;
vec<int, 4> minIndices2 = indices2;
vec<int, 4> minIndices3 = indices3;
vec<int, 4> minIndices4 = indices4;
vec<float, 4> minvalues1;
vec<float, 4> minvalues2;
vec<float, 4> minvalues3;
vec<float, 4> minvalues4;
vload(minvalues1, array);
vload(minvalues2, array + 4);
vload(minvalues3, array + 8);
vload(minvalues4, array + 12);
vec<float, 4> minValues1;
vec<float, 4> minValues2;
vec<float, 4> minValues3;
vec<float, 4> minValues4;
vload(minValues1, array);
vload(minValues2, array + 4);
vload(minValues3, array + 8);
vload(minValues4, array + 12);
vec<float, 4> values1;
vec<float, 4> values2;
......@@ -116,61 +115,60 @@ int32_t vecBlend(const float* distancesIn, int n) {
// 1
vload(values1, array + i); // 0-3
indices1 = indices1 + increment;
vec<int, 4> lt1 = values1 < minvalues1;
vselect(minindices1, indices1, minindices1, lt1);
vmin(minvalues1, values1, minvalues1);
vec<int, 4> lt1 = values1 < minValues1;
vselect(minIndices1, indices1, minIndices1, lt1);
vmin(minValues1, values1, minValues1);
// 2
vload(values2, array + i + 4); // 4-7
indices2 = indices2 + increment;
vec<int, 4> lt2 = values2 < minvalues2;
vselect(minindices2, indices2, minindices2, lt2);
vmin(minvalues2, values2, minvalues2);
vec<int, 4> lt2 = values2 < minValues2;
vselect(minIndices2, indices2, minIndices2, lt2);
vmin(minValues2, values2, minValues2);
// 3
vload(values3, array + i + 8); // 8-11
indices3 = indices3 + increment;
vec<int, 4> lt3 = values3 < minvalues3;
vselect(minindices3, indices3, minindices3, lt3);
vmin(minvalues3, values3, minvalues3);
vec<int, 4> lt3 = values3 < minValues3;
vselect(minIndices3, indices3, minIndices3, lt3);
vmin(minValues3, values3, minValues3);
// 4
vload(values4, array + i + 12); // 12-15
indices4 = indices4 + increment;
vec<int, 4> lt4 = values4 < minvalues4;
vselect(minindices4, indices4, minindices4, lt4);
vmin(minvalues4, values4, minvalues4);
vec<int, 4> lt4 = values4 < minValues4;
vselect(minIndices4, indices4, minIndices4, lt4);
vmin(minValues4, values4, minValues4);
}
// Compare //1 with //2 minimum becomes 1
vec<int, 4> lt12 = minvalues1 < minvalues2;
vselect(minindices1, minindices1, minindices2, lt12);
vmin(minvalues1, minvalues1, minvalues2);
// compare //3 with //4 minimum becomes 3
vec<int, 4> lt34 = minvalues3 < minvalues4;
vselect(minindices3, minindices3, minindices4, lt34);
vmin(minvalues3, minvalues3, minvalues4);
// Compare //1 with //3 minimum becomes 1
vec<int, 4> lt13 = minvalues1 < minvalues3;
vselect(minindices1, minindices1, minindices3, lt13);
vmin(minvalues1, minvalues1, minvalues3);
// Do the final calculation scalar way
int minindices[4];
float minvalues[4];
vstore(minvalues, minvalues1);
vstore(minindices, minindices1);
size_t minIndex = minindices[0];
float minvalue = minvalues[0];
float minValues[16];
int minIndices[16];
vstore(minValues, minValues1);
vstore(minValues + 4, minValues2);
vstore(minValues + 8, minValues3);
vstore(minValues + 12, minValues4);
vstore(minIndices, minIndices1);
vstore(minIndices + 4, minIndices2);
vstore(minIndices + 8, minIndices3);
vstore(minIndices + 12, minIndices4);
for (size_t i = 1; i < 4; ++i) {
const float value = minvalues[i];
if (value < minvalue) {
minvalue = value;
minIndex = minindices[i];
float minValue = minValues[0];
int32_t minIndex = minIndices[0];
for (size_t i = 1; i < 16; ++i) {
const float value = minValues[i];
const int32_t index = minIndices[i];
if (value < minValue) {
minValue = value;
minIndex = index;
} else if (value == minValue && index < minIndex) {
// we want to return the smallest index
// in case of 2 same values
minIndex = index;
}
}
return minIndex;
}
// index of minimum vec quite fast for the "unordered"/"random" case
ATH_ALWAYS_INLINE
int32_t vecUnordered(const float* distancesIn, int n) {
int32_t vecUpdateIdxOnNewMin(const float* distancesIn, int n) {
using namespace CxxUtils;
const float* array =
CxxUtils::assume_aligned<GSFConstants::alignment>(distancesIn);
......@@ -248,17 +246,18 @@ int32_t vecUnordered(const float* distancesIn, int n) {
} // namespace findMinimumIndexDetail
namespace findMinimumIndex {
enum Impl { VecUnordered = 0, VecBlend = 1, C = 2, STL = 3 };
enum Impl { VecUpdateIdxOnNewMin = 0, VecAlwaysTrackIdx = 1, C = 2, STL = 3 };
template <enum Impl I>
ATH_ALWAYS_INLINE int32_t impl(const float* distancesIn, int n) {
static_assert(I == VecUnordered || I == VecBlend || I == C || I == STL,
"Not a valid implementation chosen");
if constexpr (I == VecUnordered) {
return findMinimumIndexDetail::vecUnordered(distancesIn, n);
} else if constexpr (I == VecBlend) {
return findMinimumIndexDetail::vecBlend(distancesIn, n);
static_assert(
I == VecUpdateIdxOnNewMin || I == VecAlwaysTrackIdx || I == C || I == STL,
"Not a valid implementation chosen");
if constexpr (I == VecUpdateIdxOnNewMin) {
return findMinimumIndexDetail::vecUpdateIdxOnNewMin(distancesIn, n);
} else if constexpr (I == VecAlwaysTrackIdx) {
return findMinimumIndexDetail::vecAlwaysTrackIdx(distancesIn, n);
} else if constexpr (I == C) {
return findMinimumIndexDetail::scalarC(distancesIn, n);
} else if constexpr (I == STL) {
......
C Minimum index : 17 with value 0
STL Minimum index : 17 with value 0
VecAlwaysTrackIdx Minimum index : 17 with value 0
VecUpdateIdxOnNewMin Minimum index : 17 with value 0
......@@ -343,7 +343,7 @@ findMergesImpl(const Component1DArray& componentsIn,
while (numberOfComponentsLeft > reducedSize) {
// find pair with minimum distance
const int32_t minIndex =
findMinimumIndex::impl<findMinimumIndex::VecBlend>(
findMinimumIndex::impl<findMinimumIndex::VecAlwaysTrackIdx>(
distances.buffer(), nnpadded);
const triangularToIJ conversion = convert(minIndex);
int8_t minTo = conversion.I;
......
......@@ -17,9 +17,17 @@ struct InitArray {
InitArray() : distances(N) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.001, 10.0);
std::uniform_real_distribution<> dis(0.001, 5.0);
for (size_t i = 0; i < N; ++i) {
distances[i] = dis(gen);
// At the end of the "vectorized loops"
// 36 will the 1st element of the second SIMD vec
// 1024 will the 1st element of the first SIMD vec
// 17 will the 2nd eleament of the fourth SIMD vec
// 40 will the 1st eleament of the third SIMD vec
if (i == 17 || i == 40 || i == 36 || i == 1024) {
distances[i] = 0.0;
}
}
}
GSFUtils::AlignedDynArray<float, GSFConstants::alignment> distances;
......@@ -41,16 +49,16 @@ static void findMinimumIndexSTL() {
}
static void findMinimumIndexVecBlend() {
int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecBlend>(
int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecAlwaysTrackIdx>(
initArray.distances.buffer(), N);
std::cout << "Vec Minimum index Blend : " << minIndex << " with value "
std::cout << "VecAlwaysTrackIdx Minimum index : " << minIndex << " with value "
<< initArray.distances[minIndex] << '\n';
}
static void findMinimumIndexVecUnordered() {
int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecUnordered>(
int32_t minIndex = findMinimumIndex::impl<findMinimumIndex::VecUpdateIdxOnNewMin>(
initArray.distances.buffer(), N);
std::cout << "Vec Minimum index Unordered : " << minIndex << " with value "
std::cout << "VecUpdateIdxOnNewMin Minimum index : " << minIndex << " with value "
<< initArray.distances[minIndex] << '\n';
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment