Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
athena
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Peter Sherwood
athena
Commits
2716530e
Commit
2716530e
authored
4 years ago
by
Christos Anastopoulos
Browse files
Options
Downloads
Patches
Plain Diff
fix comments after the bugfix
parent
481b6029
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Tracking/TrkFitter/TrkGaussianSumFilter/src/KLGaussianMixtureReduction.cxx
+17
-37
17 additions, 37 deletions
...r/TrkGaussianSumFilter/src/KLGaussianMixtureReduction.cxx
with
17 additions
and
37 deletions
Tracking/TrkFitter/TrkGaussianSumFilter/src/KLGaussianMixtureReduction.cxx
+
17
−
37
View file @
2716530e
...
@@ -16,24 +16,20 @@
...
@@ -16,24 +16,20 @@
#endif
#endif
#endif
#endif
//This enables -ftree-vectorize in gcc (we compile with O2)
ATH_ENABLE_VECTORIZATION
;
ATH_ENABLE_VECTORIZATION
;
namespace
{
namespace
{
/*
/*
*
* Component Merging helper methods
* Component Merging helper methods
*/
*/
using
namespace
GSFUtils
;
using
namespace
GSFUtils
;
/**
/**
* Based on
* Based on
* https://
arxiv.org/pdf/2001.00727.pdf
* https://
www.sciencedirect.com/science/article/pii/089812218990103X
* equation (1
0
)
* equation (1
6
)
* but not accounting for weights
* but not accounting for weights
* i.e
* (covI- covJ) (invCovJ - invCovI ) + (mean1-mean2) (invcov+invcov) (mean1-mean2)
* or directly from 10
* covI * invCovJ + covJ * invCovI + (mean1-mean2) (invcov+invcov) (mean1-mean2)
* covI * invCovJ + covJ * invCovI + (mean1-mean2) (invcov+invcov) (mean1-mean2)
*
*/
*/
[[
maybe_unused
]]
[[
maybe_unused
]]
float
float
...
@@ -49,6 +45,7 @@ symmetricKL(const Component1D& componentI,
...
@@ -49,6 +45,7 @@ symmetricKL(const Component1D& componentI,
/**
/**
* https://arxiv.org/pdf/2001.00727.pdf
* https://arxiv.org/pdf/2001.00727.pdf
* equation (10)
* equation (10)
* Same as above but accounting for weights
*/
*/
[[
maybe_unused
]]
[[
maybe_unused
]]
float
float
...
@@ -94,15 +91,12 @@ combine(GSFUtils::Component1D& updated,
...
@@ -94,15 +91,12 @@ combine(GSFUtils::Component1D& updated,
updated
.
invCov
=
1.
/
sumVariance
;
updated
.
invCov
=
1.
/
sumVariance
;
updated
.
weight
=
sumWeight
;
updated
.
weight
=
sumWeight
;
//
qoveP and qoverp cov is normally a small number
//
large numbers to enter the multiplications/sums
//
invCov is a large one and weight <1
//
make distance large
removed
.
mean
=
1e10
;
removed
.
mean
=
1e10
;
removed
.
cov
=
1e10
;
removed
.
cov
=
1e10
;
removed
.
invCov
=
1e10
;
removed
.
invCov
=
1e10
;
removed
.
weight
=
1
;
removed
.
weight
=
1
;
//this should be (eps-0) * (1e10 - eps)
}
}
/**
/**
...
@@ -122,13 +116,14 @@ recalculateDistances(const componentPtrRestrict componentsIn,
...
@@ -122,13 +116,14 @@ recalculateDistances(const componentPtrRestrict componentsIn,
const
int32_t
j
=
mini
;
const
int32_t
j
=
mini
;
const
int32_t
indexConst
=
(
j
-
1
)
*
j
/
2
;
const
int32_t
indexConst
=
(
j
-
1
)
*
j
/
2
;
//
Element at the same raw of mini/j
//
Rows
const
Component1D
componentJ
=
components
[
j
];
const
Component1D
componentJ
=
components
[
j
];
for
(
int32_t
i
=
0
;
i
<
j
;
++
i
)
{
for
(
int32_t
i
=
0
;
i
<
j
;
++
i
)
{
const
Component1D
componentI
=
components
[
i
];
const
Component1D
componentI
=
components
[
i
];
const
int32_t
index
=
indexConst
+
i
;
const
int32_t
index
=
indexConst
+
i
;
distances
[
index
]
=
symmetricKL
(
componentI
,
componentJ
);
distances
[
index
]
=
symmetricKL
(
componentI
,
componentJ
);
}
}
//Columns
for
(
int32_t
i
=
j
+
1
;
i
<
n
;
++
i
)
{
for
(
int32_t
i
=
j
+
1
;
i
<
n
;
++
i
)
{
const
int32_t
index
=
(
i
-
1
)
*
i
/
2
+
j
;
const
int32_t
index
=
(
i
-
1
)
*
i
/
2
+
j
;
const
Component1D
componentI
=
components
[
i
];
const
Component1D
componentI
=
components
[
i
];
...
@@ -148,7 +143,6 @@ calculateAllDistances(const componentPtrRestrict componentsIn,
...
@@ -148,7 +143,6 @@ calculateAllDistances(const componentPtrRestrict componentsIn,
const
Component1D
*
components
=
const
Component1D
*
components
=
static_cast
<
const
Component1D
*>
(
__builtin_assume_aligned
(
componentsIn
,
alignment
));
static_cast
<
const
Component1D
*>
(
__builtin_assume_aligned
(
componentsIn
,
alignment
));
float
*
distances
=
static_cast
<
float
*>
(
__builtin_assume_aligned
(
distancesIn
,
alignment
));
float
*
distances
=
static_cast
<
float
*>
(
__builtin_assume_aligned
(
distancesIn
,
alignment
));
for
(
int32_t
i
=
1
;
i
<
n
;
++
i
)
{
for
(
int32_t
i
=
1
;
i
<
n
;
++
i
)
{
const
int32_t
indexConst
=
(
i
-
1
)
*
i
/
2
;
const
int32_t
indexConst
=
(
i
-
1
)
*
i
/
2
;
const
Component1D
componentI
=
components
[
i
];
const
Component1D
componentI
=
components
[
i
];
...
@@ -170,9 +164,11 @@ resetDistances(floatPtrRestrict distancesIn,
...
@@ -170,9 +164,11 @@ resetDistances(floatPtrRestrict distancesIn,
float
*
distances
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
float
*
distances
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
const
int32_t
j
=
minj
;
const
int32_t
j
=
minj
;
const
int32_t
indexConst
=
(
j
-
1
)
*
j
/
2
;
const
int32_t
indexConst
=
(
j
-
1
)
*
j
/
2
;
//Rows
for
(
int32_t
i
=
0
;
i
<
j
;
++
i
)
{
for
(
int32_t
i
=
0
;
i
<
j
;
++
i
)
{
distances
[
indexConst
+
i
]
=
std
::
numeric_limits
<
float
>::
max
();
distances
[
indexConst
+
i
]
=
std
::
numeric_limits
<
float
>::
max
();
}
}
//Columns
for
(
int32_t
i
=
j
+
1
;
i
<
n
;
++
i
)
{
for
(
int32_t
i
=
j
+
1
;
i
<
n
;
++
i
)
{
const
int32_t
index
=
(
i
-
1
)
*
i
/
2
+
j
;
const
int32_t
index
=
(
i
-
1
)
*
i
/
2
+
j
;
distances
[
index
]
=
std
::
numeric_limits
<
float
>::
max
();
distances
[
index
]
=
std
::
numeric_limits
<
float
>::
max
();
...
@@ -241,23 +237,15 @@ findMerges(componentPtrRestrict componentsIn,
...
@@ -241,23 +237,15 @@ findMerges(componentPtrRestrict componentsIn,
}
// end of merge while
}
// end of merge while
return
merges
;
return
merges
;
}
}
/*
/*
* findMinimumIndex
* findMinimumIndex
*
*
* For FindMinimumIndex at x86_64 we have
* For FindMinimumIndex at x86_64 we have
* AVX2
and
SSE versions
* AVX2
,SSE4.1,
SSE
2
versions
* These assume that the number of elements is a multiple
* These assume that the number of elements is a multiple
* of 8 and are to be used for sizeable inputs.
* of 8 and are to be used for sizeable inputs.
*
*
* We also provide a default "scalar" implementation
* We also provide a default "scalar" implementation
*
* One of the issues we have see in that gcc8.3 and clang8 (02/2020)
* optimise differently:
* https://its.cern.ch/jira/projects/ATLASRECTS/issues/ATLASRECTS-5244
*
*/
*/
#if HAVE_FUNCTION_MULTIVERSIONING
#if HAVE_FUNCTION_MULTIVERSIONING
#if defined(__x86_64__)
#if defined(__x86_64__)
...
@@ -313,9 +301,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
...
@@ -313,9 +301,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
minindices
=
_mm256_blendv_epi8
(
minindices
,
indicesIn
,
lt
);
minindices
=
_mm256_blendv_epi8
(
minindices
,
indicesIn
,
lt
);
minvalues
=
_mm256_min_ps
(
values
,
minvalues
);
minvalues
=
_mm256_min_ps
(
values
,
minvalues
);
}
}
/*
//Do the final calculation scalar way
* Do the final calculation scalar way
*/
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
_mm256_store_ps
(
distances
,
minvalues
);
_mm256_store_ps
(
distances
,
minvalues
);
...
@@ -363,8 +349,7 @@ std::pair<int32_t,float>
...
@@ -363,8 +349,7 @@ std::pair<int32_t,float>
findMinimumIndex
(
const
floatPtrRestrict
distancesIn
,
const
int
n
)
findMinimumIndex
(
const
floatPtrRestrict
distancesIn
,
const
int
n
)
{
{
float
*
array
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
float
*
array
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
/* Assuming SSE do 2 vectors of 4 elements in a time
//Do 2 vectors of 4 elements , so 8 at time
* one might want to revisit for AVX2 */
const
__m128i
increment
=
_mm_set1_epi32
(
8
);
const
__m128i
increment
=
_mm_set1_epi32
(
8
);
__m128i
indices1
=
_mm_setr_epi32
(
0
,
1
,
2
,
3
);
__m128i
indices1
=
_mm_setr_epi32
(
0
,
1
,
2
,
3
);
__m128i
indices2
=
_mm_setr_epi32
(
4
,
5
,
6
,
7
);
__m128i
indices2
=
_mm_setr_epi32
(
4
,
5
,
6
,
7
);
...
@@ -388,9 +373,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
...
@@ -388,9 +373,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
minindices2
=
_mm_blendv_epi8
(
minindices2
,
indices2
,
lt2
);
minindices2
=
_mm_blendv_epi8
(
minindices2
,
indices2
,
lt2
);
minvalues2
=
_mm_min_ps
(
values2
,
minvalues2
);
minvalues2
=
_mm_min_ps
(
values2
,
minvalues2
);
}
}
/*
//Do the final calculation scalar way
* Do the final calculation scalar way
*/
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
_mm_store_ps
(
distances
,
minvalues1
);
_mm_store_ps
(
distances
,
minvalues1
);
...
@@ -426,8 +409,7 @@ std::pair<int32_t,float>
...
@@ -426,8 +409,7 @@ std::pair<int32_t,float>
findMinimumIndex
(
const
floatPtrRestrict
distancesIn
,
const
int
n
)
findMinimumIndex
(
const
floatPtrRestrict
distancesIn
,
const
int
n
)
{
{
float
*
array
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
float
*
array
=
(
float
*
)
__builtin_assume_aligned
(
distancesIn
,
alignment
);
/* Assuming SSE do 2 vectors of 4 elements in a time
//Do 2 vectors of 4 elements, so 8 at a time
* one might want to revisit for AVX2 */
const
__m128i
increment
=
_mm_set1_epi32
(
8
);
const
__m128i
increment
=
_mm_set1_epi32
(
8
);
__m128i
indices1
=
_mm_setr_epi32
(
0
,
1
,
2
,
3
);
__m128i
indices1
=
_mm_setr_epi32
(
0
,
1
,
2
,
3
);
__m128i
indices2
=
_mm_setr_epi32
(
4
,
5
,
6
,
7
);
__m128i
indices2
=
_mm_setr_epi32
(
4
,
5
,
6
,
7
);
...
@@ -451,9 +433,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
...
@@ -451,9 +433,7 @@ findMinimumIndex(const floatPtrRestrict distancesIn, const int n)
minindices2
=
SSE2_mm_blendv_epi8
(
minindices2
,
indices2
,
lt2
);
minindices2
=
SSE2_mm_blendv_epi8
(
minindices2
,
indices2
,
lt2
);
minvalues2
=
_mm_min_ps
(
values2
,
minvalues2
);
minvalues2
=
_mm_min_ps
(
values2
,
minvalues2
);
}
}
/*
//Do the final calculation scalar way
* Do the final calculation scalar way
*/
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
float
distances
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
alignas
(
alignment
)
int32_t
indices
[
8
];
_mm_store_ps
(
distances
,
minvalues1
);
_mm_store_ps
(
distances
,
minvalues1
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment