Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
John Apostolakis
VecGeom
Commits
4680b789
Commit
4680b789
authored
Aug 03, 2021
by
Andrei Gheata
Browse files
Fix bounding boxes on GPU by copying the values calculated on host.
parent
383c7a5e
Changes
7
Hide whitespace changes
Inline
Side-by-side
VecGeom/backend/cuda/Interface.h
View file @
4680b789
...
...
@@ -48,7 +48,7 @@ __global__ void ConstructArrayOnGpu(DataClass *gpu_ptr, size_t nElements, ArgsTy
* \param params Array(s) of constructor parameters for each object.
*/
template
<
typename
DataClass
,
typename
...
ArgsTypes
>
__global__
void
ConstructManyOnGpu
(
size_t
nElements
,
DataClass
**
gpu_ptrs
,
const
ArgsTypes
*
...
params
)
__global__
void
ConstructManyOnGpu
(
size_t
nElements
,
DataClass
**
gpu_ptrs
,
const
ArgsTypes
*
...
params
)
{
const
size_t
tid
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
...
...
@@ -57,13 +57,24 @@ __global__ void ConstructManyOnGpu(size_t nElements, DataClass ** gpu_ptrs, cons
}
}
template
<
typename
DataClass
>
__global__
void
CopyBBoxesToGpu
(
size_t
nElements
,
DataClass
**
raw_ptrs
,
Precision
*
boxes
)
{
const
size_t
tid
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
for
(
size_t
idx
=
tid
;
idx
<
nElements
;
idx
+=
blockDim
.
x
*
gridDim
.
x
)
{
raw_ptrs
[
idx
]
->
SetBBox
({
boxes
[
6
*
idx
],
boxes
[
6
*
idx
+
1
],
boxes
[
6
*
idx
+
2
]},
{
boxes
[
6
*
idx
+
3
],
boxes
[
6
*
idx
+
4
],
boxes
[
6
*
idx
+
5
]});
}
}
template
<
typename
DataClass
,
typename
...
ArgsTypes
>
void
Generic_CopyToGpu
(
DataClass
*
const
gpu_ptr
,
ArgsTypes
...
params
)
{
ConstructOnGpu
<<<
1
,
1
>>>
(
gpu_ptr
,
params
...);
}
}
//
End cuda
namespace
}
// namespace
cuda
#else
...
...
@@ -74,7 +85,7 @@ Type *AllocateOnDevice();
template
<
typename
DataClass
,
typename
...
ArgsTypes
>
void
Generic_CopyToGpu
(
DataClass
*
const
gpu_ptr
,
ArgsTypes
...
params
);
}
//
End cuda
namespace
}
// namespace
cuda
#endif
...
...
@@ -221,7 +232,9 @@ public:
{
}
~
DevicePtrBase
()
{
/* does not own content per se */
}
~
DevicePtrBase
()
{
/* does not own content per se */
}
void
Malloc
(
unsigned
long
size
)
{
...
...
@@ -422,12 +435,12 @@ namespace CudaInterfaceHelpers {
* \param[in] restToCopy Parameter pack with more arrays to copy (can be empty).
*/
template
<
typename
Arg_t
,
typename
...
Args_t
>
void
allocateAndCopyToGpu
(
std
::
unordered_map
<
const
void
*
,
void
*>
&
cpuToGpuMapping
,
std
::
size_t
nElement
,
const
Arg_t
*
toCopy
,
const
Args_t
*
...
restToCopy
)
void
allocateAndCopyToGpu
(
std
::
unordered_map
<
const
void
*
,
void
*>
&
cpuToGpuMapping
,
std
::
size_t
nElement
,
const
Arg_t
*
toCopy
,
const
Args_t
*
...
restToCopy
)
{
const
auto
nByte
=
sizeof
(
toCopy
[
0
])
*
nElement
;
const
void
*
hostMem
=
toCopy
;
void
*
deviceMem
=
AllocateOnGpu
<
void
*>
(
nByte
);
const
void
*
hostMem
=
toCopy
;
void
*
deviceMem
=
AllocateOnGpu
<
void
*>
(
nByte
);
cpuToGpuMapping
[
hostMem
]
=
deviceMem
;
CopyToGpu
(
hostMem
,
deviceMem
,
nByte
);
...
...
@@ -437,12 +450,12 @@ void allocateAndCopyToGpu(std::unordered_map<const void *, void *> & cpuToGpuMap
}
#else
// C++11 "fold expression" hack. Please remove once VecGeom moves to c++17.
int
expandParameterPack
[]
=
{
0
,
((
void
)
allocateAndCopyToGpu
(
cpuToGpuMapping
,
nElement
,
restToCopy
),
0
)
...};
(
void
)
expandParameterPack
[
0
];
// Make nvcc happy
int
expandParameterPack
[]
=
{
0
,
((
void
)
allocateAndCopyToGpu
(
cpuToGpuMapping
,
nElement
,
restToCopy
),
0
)...};
(
void
)
expandParameterPack
[
0
];
// Make nvcc happy
#endif
}
}
}
// namespace CudaInterfaceHelpers
/*!
* Construct many objects on the GPU, whose addresses and constructor parameters are passed as arrays.
...
...
@@ -453,21 +466,21 @@ void allocateAndCopyToGpu(std::unordered_map<const void *, void *> & cpuToGpuMap
* \param params Array(s) of constructor parameters with one entry for each object.
*/
template
<
class
DataClass
,
class
DevPtr_t
,
typename
...
Args_t
>
void
ConstructManyOnGpu
(
std
::
size_t
nElement
,
const
DevPtr_t
*
gpu_ptrs
,
const
Args_t
*
...
params
)
void
ConstructManyOnGpu
(
std
::
size_t
nElement
,
const
DevPtr_t
*
gpu_ptrs
,
const
Args_t
*
...
params
)
#ifdef VECCORE_CUDA
{
using
namespace
CudaInterfaceHelpers
;
std
::
unordered_map
<
const
void
*
,
void
*>
cpuToGpuMem
;
std
::
vector
<
DataClass
*>
raw_gpu_ptrs
;
std
::
transform
(
gpu_ptrs
,
gpu_ptrs
+
nElement
,
std
::
back_inserter
(
raw_gpu_ptrs
),
[](
const
DevPtr_t
&
ptr
)
{
return
static_cast
<
DataClass
*>
(
ptr
.
GetPtr
());
});
[](
const
DevPtr_t
&
ptr
)
{
return
static_cast
<
DataClass
*>
(
ptr
.
GetPtr
());
});
allocateAndCopyToGpu
(
cpuToGpuMem
,
nElement
,
raw_gpu_ptrs
.
data
(),
params
...);
ConstructManyOnGpu
<<<
128
,
32
>>>
(
raw_gpu_ptrs
.
size
(),
static_cast
<
decltype
(
raw_gpu_ptrs
.
data
())
>
(
cpuToGpuMem
[
raw_gpu_ptrs
.
data
()]),
static_cast
<
decltype
(
params
)
>
(
cpuToGpuMem
[
params
])...);
for
(
const
auto
&
memCpu_memGpu
:
cpuToGpuMem
)
{
for
(
const
auto
&
memCpu_memGpu
:
cpuToGpuMem
)
{
FreeFromGpu
(
memCpu_memGpu
.
second
);
}
}
...
...
@@ -475,9 +488,36 @@ void ConstructManyOnGpu(std::size_t nElement, const DevPtr_t * gpu_ptrs, const A
;
#endif
template
<
class
DataClass
,
class
DevPtr_t
>
void
CopyBBoxesToGpuImpl
(
std
::
size_t
nElement
,
const
DevPtr_t
*
gpu_ptrs
,
Precision
*
boxes_data
)
#ifdef VECCORE_CUDA
{
std
::
unordered_map
<
const
void
*
,
void
*>
cpuToGpuMem
;
std
::
vector
<
DataClass
*>
raw_gpu_ptrs
;
std
::
transform
(
gpu_ptrs
,
gpu_ptrs
+
nElement
,
std
::
back_inserter
(
raw_gpu_ptrs
),
[](
const
DevPtr_t
&
ptr
)
{
return
static_cast
<
DataClass
*>
(
ptr
.
GetPtr
());
});
const
auto
nByteBoxes
=
6
*
nElement
*
sizeof
(
Precision
);
const
auto
nByteVolumes
=
nElement
*
sizeof
(
DataClass
*
);
Precision
*
boxes_data_gpu
=
AllocateOnGpu
<
Precision
>
(
nByteBoxes
);
DataClass
**
raw_gpu_ptrs_gpu
=
AllocateOnGpu
<
DataClass
*>
(
nByteVolumes
);
CopyToGpu
(
boxes_data
,
boxes_data_gpu
,
nByteBoxes
);
CopyToGpu
(
raw_gpu_ptrs
.
data
(),
raw_gpu_ptrs_gpu
,
nByteVolumes
);
cudaDeviceSynchronize
();
CopyBBoxesToGpu
<
DataClass
><<<
128
,
32
>>>
(
raw_gpu_ptrs
.
size
(),
raw_gpu_ptrs_gpu
,
boxes_data_gpu
);
FreeFromGpu
(
boxes_data_gpu
);
FreeFromGpu
(
raw_gpu_ptrs_gpu
);
}
#else
;
#endif
}
// End cxx namespace
}
//
End vecgeom
namespace
}
// namespace
vecgeom
#endif // VECGEOM_ENABLE_CUDA
...
...
VecGeom/volumes/UnplacedVolume.h
View file @
4680b789
...
...
@@ -56,10 +56,10 @@ public:
VECGEOM_FORCE_INLINE
VECCORE_ATT_HOST_DEVICE
void
SetBBox
(
std
::
array
<
Vector3D
<
Precision
>
,
2
>
BBo
x
)
void
SetBBox
(
Vector3D
<
Precision
>
const
&
amin
,
Vector3D
<
Precision
>
const
&
ama
x
)
{
fBBox
[
0
]
=
BBox
[
0
]
;
fBBox
[
1
]
=
BBox
[
1
]
;
fBBox
[
0
]
=
amin
;
fBBox
[
1
]
=
amax
;
}
VECGEOM_FORCE_INLINE
...
...
@@ -72,7 +72,12 @@ public:
VECGEOM_FORCE_INLINE
VECCORE_ATT_HOST_DEVICE
void
ComputeBBox
()
{
Extent
(
fBBox
[
0
],
fBBox
[
1
]);
}
void
ComputeBBox
()
{
#ifndef VECCORE_CUDA_DEVICE_COMPILATION
Extent
(
fBBox
[
0
],
fBBox
[
1
]);
#endif
}
// ---------------- Contains --------------------------------------------------------------------
...
...
@@ -272,7 +277,8 @@ public:
sign
[
1
]
=
invDir
.
y
()
<
0
;
sign
[
2
]
=
invDir
.
z
()
<
0
;
return
BoxImplementation
::
IntersectCachedKernel2
<
Precision
,
Precision
>
(
fBBox
,
point
,
invDir
,
sign
.
x
(),
sign
.
y
(),
sign
.
z
(),
0
,
kInfLength
);
return
BoxImplementation
::
IntersectCachedKernel2
<
Precision
,
Precision
>
(
fBBox
,
point
,
invDir
,
sign
.
x
(),
sign
.
y
(),
sign
.
z
(),
0
,
kInfLength
);
}
/*!
...
...
@@ -354,6 +360,9 @@ public:
return
this
->
CopyToGpu
(
DevicePtr
<
cuda
::
VUnplacedVolume
>
((
void
*
)
gpu_ptr
));
}
static
void
CopyBBoxesToGpu
(
const
std
::
vector
<
VUnplacedVolume
const
*>
&
volumes
,
const
std
::
vector
<
DevicePtr
<
cuda
::
VUnplacedVolume
>>
&
gpu_ptrs
);
#endif
/*!
...
...
source/CudaManager.cpp
View file @
4680b789
...
...
@@ -103,7 +103,7 @@ vecgeom::DevicePtr<const vecgeom::cuda::VPlacedVolume> CudaManager::Synchronize(
{
std
::
vector
<
Transformation3D
const
*>
trafos
;
std
::
vector
<
DevicePtr
<
cuda
::
Transformation3D
>>
devPtrs
;
for
(
Transformation3D
const
*
trafo
:
transformations_
)
{
for
(
Transformation3D
const
*
trafo
:
transformations_
)
{
trafos
.
push_back
(
trafo
);
devPtrs
.
push_back
(
LookupTransformation
(
trafo
));
}
...
...
@@ -147,6 +147,21 @@ vecgeom::DevicePtr<const vecgeom::cuda::VPlacedVolume> CudaManager::Synchronize(
timer
.
Stop
();
if
(
verbose_
>
2
)
std
::
cout
<<
" OK;
\t
TIME NEEDED "
<<
timer
.
Elapsed
()
<<
"s
\n
"
;
if
(
verbose_
>
2
)
std
::
cout
<<
"Copying bounding boxes..."
;
timer
.
Start
();
{
std
::
vector
<
VUnplacedVolume
const
*>
volumes
;
std
::
vector
<
DevicePtr
<
cuda
::
VUnplacedVolume
>>
devPtrs
;
for
(
VUnplacedVolume
const
*
vol
:
unplaced_volumes_
)
{
volumes
.
push_back
(
vol
);
devPtrs
.
push_back
(
LookupUnplaced
(
vol
));
}
VUnplacedVolume
::
CopyBBoxesToGpu
(
volumes
,
devPtrs
);
}
timer
.
Stop
();
if
(
verbose_
>
2
)
std
::
cout
<<
" OK;
\t
TIME NEEDED "
<<
timer
.
Elapsed
()
<<
"s
\n
"
;
synchronized_
=
true
;
world_gpu_
=
LookupPlaced
(
world_
);
...
...
@@ -437,7 +452,7 @@ template <typename Type>
typename
CudaManager
::
GpuAddress
CudaManager
::
Lookup
(
Type
const
*
const
key
)
const
{
const
CpuAddress
cpu_address
=
ToCpuAddress
(
key
);
const
auto
iter
=
memory_map_
.
find
(
cpu_address
);
const
auto
iter
=
memory_map_
.
find
(
cpu_address
);
assert
(
iter
!=
memory_map_
.
end
());
return
iter
->
second
;
}
...
...
@@ -471,7 +486,8 @@ DevicePtr<cuda::Transformation3D> CudaManager::LookupTransformation(Transformati
return
DevicePtr
<
cuda
::
Transformation3D
>
(
Lookup
(
host_ptr
));
}
DevicePtr
<
cuda
::
Vector
<
CudaManager
::
CudaDaughter_t
>>
CudaManager
::
LookupDaughters
(
Vector
<
Daughter
>
*
const
host_ptr
)
const
DevicePtr
<
cuda
::
Vector
<
CudaManager
::
CudaDaughter_t
>>
CudaManager
::
LookupDaughters
(
Vector
<
Daughter
>
*
const
host_ptr
)
const
{
return
DevicePtr
<
cuda
::
Vector
<
CudaManager
::
CudaDaughter_t
>>
(
Lookup
(
host_ptr
));
}
...
...
@@ -500,10 +516,10 @@ void CudaManager::CopyPlacedVolumes() const
};
std
::
unordered_map
<
std
::
type_index
,
TypeInfoForPlaced
>
typesToCopy
;
for
(
VPlacedVolume
const
*
pvol
:
placed_volumes_
)
{
for
(
VPlacedVolume
const
*
pvol
:
placed_volumes_
)
{
const
std
::
type_index
tidx
{
typeid
(
*
pvol
)};
auto
&
typeInfo
=
typesToCopy
[
std
::
type_index
(
typeid
(
*
pvol
))];
auto
&
typeInfo
=
typesToCopy
[
std
::
type_index
(
typeid
(
*
pvol
))];
typeInfo
.
hostVol
.
push_back
(
pvol
);
typeInfo
.
logical
.
push_back
(
LookupLogical
(
pvol
->
GetLogicalVolume
()));
typeInfo
.
trafo
.
push_back
(
LookupTransformation
(
pvol
->
GetTransformation
()));
...
...
@@ -519,9 +535,9 @@ void CudaManager::CopyPlacedVolumes() const
#endif
}
for
(
const
auto
&
type_volInfo
:
typesToCopy
)
{
const
auto
&
volInfo
=
type_volInfo
.
second
;
const
VPlacedVolume
*
const
firstVol
=
volInfo
.
hostVol
.
front
();
for
(
const
auto
&
type_volInfo
:
typesToCopy
)
{
const
auto
&
volInfo
=
type_volInfo
.
second
;
const
VPlacedVolume
*
const
firstVol
=
volInfo
.
hostVol
.
front
();
if
(
verbose_
>
3
)
{
std
::
cout
<<
"
\n\t
"
<<
volInfo
.
hostVol
.
size
()
<<
"
\t
"
<<
type_volInfo
.
first
.
name
();
}
...
...
source/UnplacedVolume.cpp
View file @
4680b789
...
...
@@ -2,6 +2,9 @@
#include "VecGeom/volumes/PlacedVolume.h"
#include "VecGeom/base/SOA3D.h"
#include "VecGeom/volumes/utilities/VolumeUtilities.h"
#ifdef VECGEOM_ENABLE_CUDA
#include "VecGeom/backend/cuda/Interface.h"
#endif
namespace
vecgeom
{
inline
namespace
VECGEOM_IMPL_NAMESPACE
{
...
...
@@ -286,5 +289,41 @@ VPlacedVolume *VUnplacedVolume::PlaceVolume(char const *const label, LogicalVolu
}
#endif
#ifdef VECGEOM_CUDA_INTERFACE
void
VUnplacedVolume
::
CopyBBoxesToGpu
(
const
std
::
vector
<
VUnplacedVolume
const
*>
&
volumes
,
const
std
::
vector
<
DevicePtr
<
cuda
::
VUnplacedVolume
>>
&
gpu_ptrs
)
{
assert
(
volumes
.
size
()
==
gpu_ptrs
.
size
()
&&
"Unequal CPU/GPU vectors for copying bounding boxes."
);
// Copy boxes data in a contiguous array, box icrt starting at index 6*icrt
std
::
vector
<
Precision
>
boxesData
(
6
*
gpu_ptrs
.
size
());
int
icrt
=
0
;
for
(
auto
vol
:
volumes
)
{
Vector3D
<
Precision
>
amin
,
amax
;
vol
->
GetBBox
(
amin
,
amax
);
assert
((
amax
-
amin
).
Mag
()
>
0
);
for
(
unsigned
int
i
=
0
;
i
<
3
;
++
i
)
{
boxesData
[
6
*
icrt
+
i
]
=
amin
[
i
];
boxesData
[
6
*
icrt
+
i
+
3
]
=
amax
[
i
];
}
icrt
++
;
}
// Dispatch to the GPU interface helper
CopyBBoxesToGpuImpl
<
cuda
::
VUnplacedVolume
,
DevicePtr
<
cuda
::
VUnplacedVolume
>>
(
gpu_ptrs
.
size
(),
gpu_ptrs
.
data
(),
boxesData
.
data
());
}
#endif
}
// namespace VECGEOM_IMPL_NAMESPACE
#ifdef VECCORE_CUDA
namespace
cxx
{
template
void
CopyBBoxesToGpuImpl
<
cuda
::
VUnplacedVolume
,
DevicePtr
<
cuda
::
VUnplacedVolume
>
>
(
std
::
size_t
,
DevicePtr
<
cuda
::
VUnplacedVolume
>
const
*
,
cuda
::
Precision
*
);
}
// namespace cxx
#endif // VECCORE_CUDA
}
// namespace vecgeom
test/cuda/GeometryTest.cpp
View file @
4680b789
...
...
@@ -4,7 +4,8 @@
* This test validates a given geometry on the GPU.
* - A geometry is read from a GDML file passed as argument.
* - This geometry is constructed both for CPU and GPU.
* - It is subsequently visited on the GPU, while information like volume IDs and transformations are recorded in a large array.
* - It is subsequently visited on the GPU, while information like volume IDs and transformations are recorded in a
* large array.
* - This array is copied to the host, and the host geometry is visited, comparing the data coming from the GPU.
*
* Which data are recorded and how they are compared is completely controlled by the struct GeometryInfo.
...
...
@@ -24,42 +25,50 @@
using
namespace
vecgeom
;
void
visitVolumes
(
const
VPlacedVolume
*
volume
,
GeometryInfo
*
geoData
,
std
::
size_t
&
volCounter
,
const
std
::
size_t
nGeoData
,
unsigned
int
depth
)
{
void
visitVolumes
(
const
VPlacedVolume
*
volume
,
GeometryInfo
*
geoData
,
std
::
size_t
&
volCounter
,
const
std
::
size_t
nGeoData
,
unsigned
int
depth
)
{
assert
(
volCounter
<
nGeoData
);
geoData
[
volCounter
++
]
=
GeometryInfo
{
depth
,
*
volume
};
for
(
const
VPlacedVolume
*
daughter
:
volume
->
GetDaughters
())
{
for
(
const
VPlacedVolume
*
daughter
:
volume
->
GetDaughters
())
{
visitVolumes
(
daughter
,
geoData
,
volCounter
,
nGeoData
,
depth
+
1
);
}
}
void
compareGeometries
(
const
cxx
::
VPlacedVolume
*
hostVolume
,
std
::
size_t
&
volumeCounter
,
const
std
::
vector
<
GeometryInfo
>
&
deviceGeometry
,
unsigned
int
depth
)
{
void
compareGeometries
(
const
cxx
::
VPlacedVolume
*
hostVolume
,
std
::
size_t
&
volumeCounter
,
const
std
::
vector
<
GeometryInfo
>
&
deviceGeometry
,
unsigned
int
depth
)
{
if
(
volumeCounter
>=
deviceGeometry
.
size
())
{
errx
(
3
,
"No device volume corresponds to volume %lu"
,
volumeCounter
);
}
GeometryInfo
info
{
depth
,
*
hostVolume
};
if
(
!
(
info
==
deviceGeometry
[
volumeCounter
])
)
{
if
(
!
(
info
==
deviceGeometry
[
volumeCounter
]))
{
printf
(
"CPU transformation:
\n
"
);
info
.
trans
.
Print
();
printf
(
"
\n
GPU transformation
\n
"
);
deviceGeometry
[
volumeCounter
].
trans
.
Print
();
printf
(
"
\n
"
);
errx
(
4
,
"Volume #%lu (id=%d label=%s logicalId=%d) differs from GPU volume (id=%d logicalId=%d)
\n
"
,
volumeCounter
,
hostVolume
->
id
(),
hostVolume
->
GetLabel
().
c_str
(),
hostVolume
->
GetLogicalVolume
()
->
id
(),
printf
(
"CPU amin: (%g, %g, %g) amax: (%g, %g, %g)
\n
"
,
info
.
amin
[
0
],
info
.
amin
[
1
],
info
.
amin
[
2
],
info
.
amax
[
0
],
info
.
amax
[
1
],
info
.
amax
[
2
]);
printf
(
"GPU amin: (%g, %g, %g) amax: (%g, %g, %g)
\n
"
,
deviceGeometry
[
volumeCounter
].
amin
[
0
],
deviceGeometry
[
volumeCounter
].
amin
[
1
],
deviceGeometry
[
volumeCounter
].
amin
[
2
],
deviceGeometry
[
volumeCounter
].
amax
[
0
],
deviceGeometry
[
volumeCounter
].
amax
[
1
],
deviceGeometry
[
volumeCounter
].
amax
[
2
]);
errx
(
4
,
"Volume #%lu (id=%d label=%s logicalId=%d) differs from GPU volume (id=%d logicalId=%d)
\n
"
,
volumeCounter
,
hostVolume
->
id
(),
hostVolume
->
GetLabel
().
c_str
(),
hostVolume
->
GetLogicalVolume
()
->
id
(),
deviceGeometry
[
volumeCounter
].
id
,
deviceGeometry
[
volumeCounter
].
logicalId
);
}
volumeCounter
++
;
for
(
const
cxx
::
VPlacedVolume
*
daughter
:
hostVolume
->
GetDaughters
())
{
for
(
const
cxx
::
VPlacedVolume
*
daughter
:
hostVolume
->
GetDaughters
())
{
compareGeometries
(
daughter
,
volumeCounter
,
deviceGeometry
,
depth
+
1
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
#ifdef VECGEOM_GDML
bool
verbose
=
true
;
bool
validate
=
true
;
...
...
test/cuda/GeometryTest.cu
View file @
4680b789
...
...
@@ -8,9 +8,9 @@
__managed__
std
::
size_t
g_volumesVisited
;
__managed__
bool
g_problemDuringVisit
;
__device__
void
visitVolumes
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
,
GeometryInfo
*
geoData
,
std
::
size_t
&
volCounter
,
const
std
::
size_t
nGeoData
,
unsigned
int
depth
)
{
__device__
void
visitVolumes
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
,
GeometryInfo
*
geoData
,
std
::
size_t
&
volCounter
,
const
std
::
size_t
nGeoData
,
unsigned
int
depth
)
{
if
(
volCounter
>=
nGeoData
)
{
g_problemDuringVisit
=
true
;
printf
(
"Sorry, hard-coded buffer size exhausted after visiting %lu volumes. Please increase.
\n
"
,
volCounter
);
...
...
@@ -18,21 +18,22 @@ void visitVolumes(const vecgeom::cuda::VPlacedVolume * volume, GeometryInfo * ge
}
geoData
[
volCounter
++
]
=
GeometryInfo
{
depth
,
*
volume
};
for
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
daughter
:
volume
->
GetDaughters
())
{
for
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
daughter
:
volume
->
GetDaughters
())
{
visitVolumes
(
daughter
,
geoData
,
volCounter
,
nGeoData
,
depth
+
1
);
if
(
g_problemDuringVisit
)
break
;
}
}
__global__
void
kernel_visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
,
GeometryInfo
*
g
eoData
,
const
std
::
size_t
nGeoData
)
{
g_volumesVisited
=
0
;
__global__
void
kernel_visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
,
GeometryInfo
*
geoData
,
const
std
::
size_t
nG
eoData
)
{
g_volumesVisited
=
0
;
g_problemDuringVisit
=
false
;
visitVolumes
(
volume
,
geoData
,
g_volumesVisited
,
nGeoData
,
0
);
}
std
::
vector
<
GeometryInfo
>
visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
)
{
std
::
vector
<
GeometryInfo
>
visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
)
{
auto
err
=
cudaDeviceSynchronize
();
if
(
err
!=
cudaSuccess
)
{
errx
(
2
,
"Cuda error before visiting device geometry: '%s'"
,
cudaGetErrorString
(
err
));
...
...
@@ -40,15 +41,15 @@ std::vector<GeometryInfo> visitDeviceGeometry(const vecgeom::cuda::VPlacedVolume
constexpr
std
::
size_t
maxElem
=
100000
;
GeometryInfo
*
geoDataGPU
;
GeometryInfo
*
geoDataGPU
;
cudaMalloc
(
&
geoDataGPU
,
maxElem
*
sizeof
(
GeometryInfo
));
kernel_visitDeviceGeometry
<<<
1
,
1
>>>
(
volume
,
geoDataGPU
,
maxElem
);
kernel_visitDeviceGeometry
<<<
1
,
1
>>>
(
volume
,
geoDataGPU
,
maxElem
);
err
=
cudaDeviceSynchronize
();
if
(
err
!=
cudaSuccess
)
{
errx
(
2
,
"Visiting device geometry failed with '%s'"
,
cudaGetErrorString
(
err
));
}
else
if
(
g_problemDuringVisit
)
{
errx
(
2
,
"Visiting device geometry
failed
."
);
errx
(
2
,
"Visiting device geometry
reached depth limit
."
);
}
std
::
vector
<
GeometryInfo
>
geoDataCPU
(
maxElem
);
...
...
@@ -65,5 +66,3 @@ std::vector<GeometryInfo> visitDeviceGeometry(const vecgeom::cuda::VPlacedVolume
return
geoDataCPU
;
}
test/cuda/GeometryTest.h
View file @
4680b789
...
...
@@ -9,34 +9,35 @@
namespace
vecgeom
{
VECGEOM_DEVICE_FORWARD_DECLARE
(
class
VPlacedVolume
;);
VECGEOM_HOST_FORWARD_DECLARE
(
class
VPlacedVolume
;);
}
}
// namespace vecgeom
struct
GeometryInfo
{
unsigned
int
depth
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
VPlacedVolume
>
().
id
())
id
=
0
;
unsigned
int
depth
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
VPlacedVolume
>
().
id
())
id
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
VPlacedVolume
>
().
GetChildId
())
childId
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
VPlacedVolume
>
().
GetCopyNo
())
copyNo
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
LogicalVolume
>
().
id
())
logicalId
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
VPlacedVolume
>
().
GetCopyNo
())
copyNo
=
0
;
decltype
(
std
::
declval
<
vecgeom
::
LogicalVolume
>
().
id
())
logicalId
=
0
;
vecgeom
::
Transformation3D
trans
;
vecgeom
::
Vector3D
<
vecgeom
::
Precision
>
amin
;
vecgeom
::
Vector3D
<
vecgeom
::
Precision
>
amax
;
GeometryInfo
()
=
default
;
template
<
typename
Vol_t
>
template
<
typename
Vol_t
>
VECCORE_ATT_HOST_DEVICE
GeometryInfo
(
unsigned
int
theDepth
,
Vol_t
&
vol
)
:
depth
(
theDepth
),
id
(
vol
.
id
()),
childId
(
vol
.
GetChildId
()),
copyNo
(
vol
.
GetCopyNo
()),
logicalId
(
vol
.
GetLogicalVolume
()
->
id
()),
trans
{
*
vol
.
GetTransformation
()}
{
}
bool
operator
==
(
const
GeometryInfo
&
rhs
)
{
return
depth
==
rhs
.
depth
&&
id
==
rhs
.
id
&&
childId
==
rhs
.
childId
&&
copyNo
==
rhs
.
copyNo
&&
logicalId
==
rhs
.
logicalId
&&
trans
==
rhs
.
trans
;
GeometryInfo
(
unsigned
int
theDepth
,
Vol_t
&
vol
)
:
depth
(
theDepth
),
id
(
vol
.
id
()),
childId
(
vol
.
GetChildId
()),
copyNo
(
vol
.
GetCopyNo
()),
logicalId
(
vol
.
GetLogicalVolume
()
->
id
()),
trans
{
*
vol
.
GetTransformation
()}
{
vol
.
GetUnplacedVolume
()
->
GetBBox
(
amin
,
amax
);
assert
((
amax
-
amin
).
Mag
()
>
0
&&
"Bounding box size must be positive"
);
}
bool
operator
==
(
const
GeometryInfo
&
rhs
)
{
return
depth
==
rhs
.
depth
&&
id
==
rhs
.
id
&&
childId
==
rhs
.
childId
&&
copyNo
==
rhs
.
copyNo
&&
logicalId
==
rhs
.
logicalId
&&
trans
==
rhs
.
trans
&&
amin
==
rhs
.
amin
&&
amax
==
rhs
.
amax
;
}
};
std
::
vector
<
GeometryInfo
>
visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
);
\ No newline at end of file
std
::
vector
<
GeometryInfo
>
visitDeviceGeometry
(
const
vecgeom
::
cuda
::
VPlacedVolume
*
volume
);
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment