Commit ff2e78ff authored by Andrei Gheata's avatar Andrei Gheata
Browse files

Fixes for navigation index table in single precision mode.

parent c8dd8b94
......@@ -426,8 +426,8 @@ public:
size_t DeviceSizeOf() const { return DevicePtr<cuda::Transformation3D>::SizeOf(); }
DevicePtr<cuda::Transformation3D> CopyToGpu() const;
DevicePtr<cuda::Transformation3D> CopyToGpu(DevicePtr<cuda::Transformation3D> const gpu_ptr) const;
static void CopyManyToGpu(const std::vector<Transformation3D const *>& trafos,
const std::vector<DevicePtr<cuda::Transformation3D>>& gpu_ptrs);
static void CopyManyToGpu(const std::vector<Transformation3D const *> &trafos,
const std::vector<DevicePtr<cuda::Transformation3D>> &gpu_ptrs);
#endif
#ifdef VECGEOM_ROOT
......@@ -611,7 +611,7 @@ void Transformation3D::DoRotation_new(Vector3D<InputType> const &master, Vector3
}
// General case
local = Vector3D<double>(); // reset to zero -- any better way to do this???
local = Vector3D<InputType>(); // reset to zero -- any better way to do this???
if (code & 0x001) {
local[0] += master[0] * fRotation[0];
}
......
......@@ -21,7 +21,7 @@ NavIndex_t BuildNavIndexVisitor::apply(NavStatePath *state, int level, NavIndex_
return NavIndexTable::Instance()->ValidateState(state);
}
// Size in bytes of the current node data
size_t current_size = (3 + nd + ((nd + 1) & 1)) * sizeof(unsigned int) + int(cacheTrans) * 12 * sizeof(double);
size_t current_size = (3 + nd + ((nd + 1) & 1)) * sizeof(unsigned int) + int(cacheTrans) * 12 * sizeof(Precision);
if (fDoCount) {
fTableSize += current_size;
return 0;
......@@ -66,14 +66,15 @@ NavIndex_t BuildNavIndexVisitor::apply(NavStatePath *state, int level, NavIndex_
*content_hasm = 0x04 + 0x02 * (unsigned short)mat.HasTranslation() + (unsigned short)mat.HasRotation();
// Write the transformation elements
auto content_mat = (double *)(&fNavInd[fCurrent]);
auto content_mat = (Precision *)(&fNavInd[fCurrent]);
for (auto i = 0; i < 3; ++i)
content_mat[i] = mat.Translation(i);
for (auto i = 0; i < 9; ++i)
content_mat[i + 3] = mat.Rotation(i);
// Set new value for fCurrent
fCurrent += 24;
fCurrent += 12 * sizeof(Precision) / sizeof(NavIndex_t);
assert((fCurrent - new_mother) * sizeof(NavIndex_t) == current_size);
return new_mother;
}
......
......@@ -115,12 +115,13 @@ int visitAllPlacedVolumesPassNavIndex(VPlacedVolume const *currentvolume, Visito
visitor->apply(state, nav_ind);
auto ierr = visitor->GetError();
if (ierr) {
printf("=== EEE === TestNavIndex: %s\n", errcodes[ierr]);
printf("=== EEE === TestNavIndex: %s\n", errcodes[ierr - 1]);
return ierr;
}
for (auto daughter : currentvolume->GetDaughters()) {
auto nav_ind_d = NavStateIndex::PushImpl(nav_ind, daughter);
visitAllPlacedVolumesPassNavIndex(daughter, visitor, state, nav_ind_d);
ierr = visitAllPlacedVolumesPassNavIndex(daughter, visitor, state, nav_ind_d);
if (ierr) return ierr;
}
state->Pop();
}
......
......@@ -29,8 +29,8 @@ namespace visitorcuda {
class GlobalToLocalVisitor {
private:
int fError = 0; ///< error code
int fNiter = 0; ///< number of iterations
int fError = 0; ///< error code
int fNiter = 0; ///< number of iterations
public:
VECCORE_ATT_HOST_DEVICE
GlobalToLocalVisitor() {}
......@@ -115,21 +115,21 @@ int visitAllPlacedVolumesPassNavIndex(VPlacedVolume const *currentvolume, Visito
"level mismatch",
"navigation index inconsistency for Push/Pop",
"number of daughters mismatch",
"transformation matrix mismatch"
};
constexpr int maxiter = 100000; // limit the maximum number of iterations (slow on 1 GPU thread)
"transformation matrix mismatch"};
constexpr int maxiter = 100000; // limit the maximum number of iterations (slow on 1 GPU thread)
if (currentvolume != NULL) {
state->Push(currentvolume);
visitor->apply(state, nav_ind);
auto ierr = visitor->GetError();
if (ierr) {
printf("=== EEE === TestNavIndex: %s\n", errcodes[ierr]);
printf("=== EEE === TestNavIndex: %s\n", errcodes[ierr - 1]);
return ierr;
}
if (visitor->GetNiter() > maxiter) return 0;
for (auto daughter : currentvolume->GetDaughters()) {
auto nav_ind_d = NavStateIndex::PushImpl(nav_ind, daughter);
visitAllPlacedVolumesPassNavIndex(daughter, visitor, state, nav_ind_d);
ierr = visitAllPlacedVolumesPassNavIndex(daughter, visitor, state, nav_ind_d);
if (ierr > 0) return ierr;
if (visitor->GetNiter() > maxiter) return 0;
}
state->Pop();
......@@ -139,11 +139,11 @@ int visitAllPlacedVolumesPassNavIndex(VPlacedVolume const *currentvolume, Visito
} // namespace visitorcuda
__global__
void TestNavIndexGPUKernel(vecgeom::cuda::VPlacedVolume const* const gpu_world, vecgeom::cuda::NavStatePath * const state, int *ierr)
__global__ void TestNavIndexGPUKernel(vecgeom::cuda::VPlacedVolume const *const gpu_world,
vecgeom::cuda::NavStatePath *const state, int *ierr)
{
using namespace visitorcuda;
state->Clear();
GlobalToLocalVisitor visitor;
......@@ -152,14 +152,14 @@ void TestNavIndexGPUKernel(vecgeom::cuda::VPlacedVolume const* const gpu_world,
*ierr = visitAllPlacedVolumesPassNavIndex(gpu_world, &visitor, state, nav_ind_top);
}
int TestNavIndexGPU(vecgeom::cxx::VPlacedVolume const* const world, int maxdepth)
int TestNavIndexGPU(vecgeom::cxx::VPlacedVolume const *const world, int maxdepth)
{
// Load and synchronize the geometry on the GPU
size_t statesize = NavigationState::SizeOfInstance(maxdepth);
vecgeom::cxx::CudaManager::Instance().LoadGeometry(world);
vecgeom::cxx::CudaManager::Instance().Synchronize();
auto gpu_world = vecgeom::cxx::CudaManager::Instance().world_gpu();
assert(gpu_world && "GPU world volume is a null pointer");
......@@ -182,9 +182,7 @@ int TestNavIndexGPU(vecgeom::cxx::VPlacedVolume const* const world, int maxdepth
checkCudaErrors(cudaFree(input_buffer));
auto tvalidate = timer.Stop();
if (!ierr)
std::cout << "=== Info navigation table validation on GPU took: " << tvalidate << " sec.\n";
if (!ierr) std::cout << "=== Info navigation table validation on GPU took: " << tvalidate << " sec.\n";
return ierr;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment