diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d2d801dd1e45dd0531033e83bbde5866de2fdbe..70ee1f0b15442ed13b12f4d536a98edc71fb3f7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -462,14 +462,15 @@ add_subdirectory (xsec) add_subdirectory (vecprot_v2) add_subdirectory (examples) if (NOT MIC) -add_subdirectory (tabxsec) add_subdirectory (magneticfield) +if (NOT CUDA) +add_subdirectory (tabxsec) add_subdirectory (test-small) add_subdirectory (test-complex) - add_subdirectory (cmstrack) add_subdirectory (Nudy) endif() +endif() if(USE_VECPHYS) add_subdirectory (vectphysproc) diff --git a/xsec/CMakeLists.txt b/xsec/CMakeLists.txt index d150819a021dcc46ef0e4902ccdbe3cb5cb7cebf..d499e822bc57b450b9c76fcc5d934c1218f68f02 100644 --- a/xsec/CMakeLists.txt +++ b/xsec/CMakeLists.txt @@ -86,14 +86,14 @@ if (CUDA) set(SRC_CPP_RELATIVE + xsec/src/TEFstate.cxx xsec/src/TPartIndex.cxx xsec/src/TPXsec.cxx xsec/src/TEXsec.cxx xsec/src/TPDecay.cxx xsec/src/TFinState.cxx xsec/src/TPFstate.cxx - xsec/src/TEFstate.cxx -# xsec/src/TMXsec.cxx + # xsec/src/TMXsec.cxx # xsec/src/TTabPhysMgr.cxx # xsec/src/TTabPhysProcess.cxx ) @@ -121,21 +121,12 @@ if (CUDA) endforeach() -#ROOT_GENERATE_DICTIONARY(xseccudaDict ${headers} MODULE libxseccuda LINKDEF inc/cudaLinkDef.h ) - -#set(SRC_CUDA ${SRC_CUDA} xseccudaDict.cxx ) cuda_add_library(xseccuda ${SRC_CUDA} - OPTIONS ${CUDA_ARCH} + OPTIONS ${CUDA_ARCH} "-DGEANT_NVCC" ) -#target_link_libraries(xseccuda ${VECGEOM_LIBRARIES} ${VC_LIBRARIES} ${HEPMC_LIBS}) -#add_dependencies(xseccuda Xsec) -#get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) -#foreach(dir ${dirs}) -#message (STATUS "dir='${dirs}'") -#endforeach() add_custom_target(testserial ALL DEPENDS xseccuda) message (STATUS "vecgeom defs ${CUDA_ARCH}") @@ -143,12 +134,11 @@ ADD_CUSTOM_COMMAND( TARGET testserial PRE_BUILD COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda" - COMMAND ${CUDA_HOST_COMPILER} -c --std=c++11 ${VECGEOM_DEFINITIONS} ${GEANTV_COMPILE_DEFINITIONS} ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cpp -o testserialReadCuda.cpp.o -I${VECGEOM_INSTALL_DIR}/include -I${CUDA_TOOLKIT_ROOT_DIR}/include + COMMAND ${CUDA_HOST_COMPILER} -c -v --std=c++11 -DUSE_VECGEOM_NAVIGATOR ${VECGEOM_DEFINITIONS} ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cpp -o testserialReadCuda.cpp.o -I${VECGEOM_INSTALL_DIR}/include -I${CUDA_TOOLKIT_ROOT_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc -I${VECGEOM_INSTALL_DIR}/include/ COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda nvcc ${GEANTV_COMPILE_DEFINITIONS}" - COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} ${VECGEOM_DEFINITIONS} -DUSE_VECGEOM_NAVIGATOR -dc -arch=sm_30 ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cu -o testserial.o -I${VECGEOM_INSTALL_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc + COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} ${VECGEOM_DEFINITIONS} -DUSE_VECGEOM_NAVIGATOR --debug --device-debug -lineinfo -DUSE_VECGEOM_NAVIGATOR -dc -arch=sm_30 ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cu -o testserial.o -I${VECGEOM_INSTALL_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda linking" - COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} --verbose -arch=sm_30 testserialReadCuda.cpp.o testserial.o -L${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -lxseccuda -L${VECGEOM_INSTALL_DIR}/lib/ -lrawvecgeomcuda -lvecgeom -o runtest - + COMMAND ${CUDA_NVCC_EXECUTABLE} --verbose -arch=sm_30 -lineinfo testserialReadCuda.cpp.o testserial.o -L${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -lxseccuda -L${VECGEOM_INSTALL_DIR}/lib/ -lrawvecgeomcuda -lvecgeom ${Vc_LIBRARIES} ${ROOT_LIBRARIES} -o runtest ) endif() diff --git a/xsec/inc/TEFstate.h b/xsec/inc/TEFstate.h index 5f83e3186c42f012afde2f3bc2dbb0cfd8481f86..defc797f484ee0474f272cd05e8118030ac013a1 100644 --- a/xsec/inc/TEFstate.h +++ b/xsec/inc/TEFstate.h @@ -15,10 +15,13 @@ // // ////////////////////////////////////////////////////////////////////////// #include "TPFstate.h" +#include "Geant/Config.h" #ifndef GEANT_NVCC +#ifdef USE_ROOT class TFile; #endif +#endif class TFinState; class TPDecay; diff --git a/xsec/testserialReadCuda.cpp b/xsec/testserialReadCuda.cpp index 59faa0a46138c7c4a8b33633a6a47eb95b4e5c37..ace3738fc2003b6299060bbf419226e416895fd3 100644 --- a/xsec/testserialReadCuda.cpp +++ b/xsec/testserialReadCuda.cpp @@ -1,13 +1,11 @@ -/* -#include "TEXsec.h" -#include "TEFstate.h" -#include "TPDecay.h" +/* #include "TTabPhysMgr.h" #include "GeantPropagator.h" #ifdef USE_ROOT #include "TGeoManager.h" #endif +*/ #ifdef USE_VECGEOM_NAVIGATOR #include "base/RNG.h" @@ -19,13 +17,40 @@ using vecgeom::RNG; #else #define UNIFORM() ((double)rand())/RAND_MAX #endif -*/ #include <iostream> #include <fstream> #include "backend/cuda/Interface.h" -void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads); - +//#include "TEXsec.h" +//#include "TEFstate.h" +//#include "TPDecay.h" +//void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads, int nrep, vecgeom::DevicePtr<double> devIPart, vecgeom::DevicePtr<double> devIEnergy); +void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads, double* iSampled,int* devIPart, float* devIEnergy); +/* +void expandPhysicsLocal(char *buf) { + std::cout << "Rebuilding TPartIndex store" << std::endl; + TPartIndex::I()->RebuildClass(buf); + int sizet = TPartIndex::I()->SizeOf(); + std::cout << "Number of bytes for TPartIndex " << sizet << std::endl; + buf += sizet; + std::cout << "Rebuilding x-sec store" << std::endl; + TEXsec::RebuildStore(buf); + int sizex = TEXsec::SizeOfStore(); + std::cout << "Number of bytes for x-sec " << sizex << std::endl; + buf += sizex; + std::cout << "Rebuilding decay store" << std::endl; + TPDecay *dec = (TPDecay *) buf; + dec->RebuildClass(); + TEFstate::SetDecayTable(dec); + int sized = dec->SizeOf(); + std::cout << "Number of bytes for decay " << sized << std::endl; + buf += sized; + std::cout << "Rebuilding final state store" << std::endl; + TEFstate::RebuildStore(buf); + int sizef = TEFstate::SizeOfStore(); + std::cout << "Number of bytes for final state -- HOST --" << sizef << std::endl; +} +*/ int main() { char *hostBuf=nullptr; @@ -51,11 +76,114 @@ int main() printf("Total size of store %d\n", totsize); - launchExpandPhysicsOnDevice(devBuf, 1, 1); + const unsigned int nrep = 50; + double *iSampled; + float *iEnergy; + int *iPart; + cudaMallocHost(&iSampled, nrep*sizeof(double)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating iSample on Host: %s \n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + cudaMallocHost(&iEnergy, nrep*sizeof(float)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating iEnergy on Host: %s \n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + cudaMallocHost(&iPart, nrep*sizeof(int)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating iPart on Host: %s \n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + + printf("Number of repetitions %d\n", nrep); + + + for(auto irep=0; irep<nrep; irep++) { + iSampled[irep] = (double) UNIFORM(); + } +/* + vecgeom::DevicePtr<double> devIPart; + vecgeom::DevicePtr<double> devIEnergy; + + devIPart.Allocate(nrep); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR ALLOC buffer\n"); + return 0; + } + + devIEnergy.Allocate(nrep); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR ALLOC buffer\n"); + return 0; + } - /* - expandPhysicsKernel<<<>>>(devBuf); + + devIPart.ToDevice(iSampled,nrep); + */ + double *devISampled; + int *devIPart; + float *devIEnergy; + cudaMalloc((void**) &devISampled, nrep*sizeof(double)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating devIPart to Device: %s \n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + cudaMalloc((void**) &devIPart, nrep*sizeof(int)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating devIPart to Device: %s \n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + cudaMalloc((void**) &devIEnergy, nrep*sizeof(float)); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR allocating devIEnergy to Device: %s\n",cudaGetErrorString(cudaGetLastError())); + return 0; + } + + cudaMemcpy(devISampled,iSampled, nrep*sizeof(double),cudaMemcpyHostToDevice); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR copying iSampled to devIPart on Device:%s \n", cudaGetErrorString(cudaGetLastError())); + return 0; + } + + launchExpandPhysicsOnDevice(devBuf, 1, 1,devISampled, devIPart,devIEnergy); + + + +cudaThreadSynchronize(); + cudaMemcpy(iSampled,devISampled, nrep*sizeof(double),cudaMemcpyDeviceToHost); + cudaError_t error=cudaGetLastError(); + if (error != cudaSuccess) { + printf(" ERROR copy iSampled from Device: %s\n", cudaGetErrorString(error)); + return 0; + } + + cudaMemcpy(iPart,devIPart, nrep*sizeof(int),cudaMemcpyDeviceToHost); + error=cudaGetLastError(); + if (error != cudaSuccess) { + printf(" ERROR copy iPart from Device: %s\n", cudaGetErrorString(error)); + return 0; + } + cudaMemcpy(iEnergy,devIEnergy, nrep*sizeof(float),cudaMemcpyDeviceToHost); + if (cudaGetLastError() != cudaSuccess) { + printf(" ERROR copy iEnergy from Device\n"); + return 0; + } + + + +/* + CudaCopyFromDevice(iEnergy,devIEnergy, devIEnergy.SizeOf()); + CudaCopyFromDevice(iPart,devIPart, devIPart.SizeOf()); +*/ + + printf("============ BACK TO THE HOST ==============\n"); + for(auto irep=0; irep<nrep; irep++) + printf(" energy for iPart %d is %f\n",iPart[irep], iEnergy[irep]); + +/* + expandPhysicsLocal(hostBuf); const char *fxsec = "/dev/null"; const char *ffins = "/dev/null"; #ifdef USE_ROOT @@ -74,14 +202,17 @@ int main() srand(12345); #endif #endif +*/ std::ofstream fftest("xphysR.txt"); - for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) { - for(auto irep=0; irep<nrep; ++irep) { - int ipart = UNIFORM() * TPartIndex::I()->NPartReac(); - int ireac = UNIFORM() * FNPROC; - float en = UNIFORM() * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin()) +// for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) { + int iel =1; + for(auto irep=0; irep<nrep; ++irep) { +/* + int ipart = iPart[irep] * TPartIndex::I()->NPartReac(); + int ireac = iPart[irep] * FNPROC; + float en = iPart[irep] * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin()) + TPartIndex::I()->Emin(); //cout<<"using RNG "<<ipart<<endl; float xs = TEXsec::Element(iel)->XS(ipart, ireac, en); @@ -95,43 +226,21 @@ int main() int ebinindx=0; TEFstate::Element(iel)->SampleReac(ipart, ireac, en, npart, weight, kerma, enr, pid, mom, ebinindx); if(npart <= 0) continue; + printf(" energy HOST %d\n",enr); fftest << iel << ":" << TPartIndex::I()->PartName(ipart) << ":" << ireac << ":" << en << ":" << xs << ":" << npart << ":" << weight << ":" << kerma << ":" << enr << ":"; for(auto i=0; i<npart; ++i) fftest << pid[i] << ":" << mom[i*3] << ":" << mom[i*3+1] << ":" << mom[i*3+2]; fftest <<":" << ebinindx << std::endl; +*/ + fftest << iPart[irep] << ":" << iEnergy[irep] <<std::endl; } - } + /// } fftest.close(); +/* #ifdef USE_ROOT delete geom; #endif */ - printf("basta!"); return 0; } -/* -void expandPhysicsKernel<<<>>>(char *buf) { - std::cout << "Rebuilding TPartIndex store" << std::endl; - TPartIndex::I()->RebuildClass(buf); - int sizet = TPartIndex::I()->SizeOf(); - std::cout << "Number of bytes for TPartIndex " << sizet << std::endl; - buf += sizet; - std::cout << "Rebuilding x-sec store" << std::endl; - TEXsec::RebuildStore(buf); - int sizex = TEXsec::SizeOfStore(); - std::cout << "Number of bytes for x-sec " << sizex << std::endl; - buf += sizex; - std::cout << "Rebuilding decay store" << std::endl; - TPDecay *dec = (TPDecay *) buf; - dec->RebuildClass(); - TEFstate::SetDecayTable(dec); - int sized = dec->SizeOf(); - std::cout << "Number of bytes for decay " << sized << std::endl; - buf += sized; - std::cout << "Rebuilding final state store" << std::endl; - TEFstate::RebuildStore(buf); - int sizef = TEFstate::SizeOfStore(); - std::cout << "Number of bytes for final state " << sizef << std::endl; -} -*/ diff --git a/xsec/testserialReadCuda.cu b/xsec/testserialReadCuda.cu index 9c21a34f37ad9a5e74c2002defc4dc85cc7b0848..8147c76929ed1c72c97c5dbfe8381a6adc8da73f 100644 --- a/xsec/testserialReadCuda.cu +++ b/xsec/testserialReadCuda.cu @@ -1,6 +1,5 @@ - - #include "backend/cuda/Interface.h" + #include "TPartIndex.h" #include "materials/Particle.h" using vecgeom::Particle; @@ -8,15 +7,17 @@ using vecgeom::Particle; #include "TPDecay.h" #include "TEFstate.h" - +#include "base/RNG.h" +using vecgeom::RNG; +#define UNIFORM() RNG::Instance().uniform() +#define FNPROC 18 /* #include "TTabPhysMgr.h" */ __global__ -void expandPhysics(char *buf) { - printf("Rebuilding TPartIndex store\n"); - Particle::CreateParticles(); +void expandPhysics(char *buf, double *idSampled, int* idPart, float* idEnergy) { + printf("Rebuild TPartIndex class\n"); TPartIndex::I()->RebuildClass(buf); int sizet = TPartIndex::I()->SizeOf(); @@ -34,10 +35,46 @@ void expandPhysics(char *buf) { int sized = dec->SizeOf(); printf("Number of bytes for decay %d\n",sized); buf += sized; - printf("Rebuilding final state store"); + printf("Rebuilding final state store\n"); TEFstate::RebuildStore(buf); int sizef = TEFstate::SizeOfStore(); printf("Number of bytes for final state %d\n",sizef); + int iel = 1; + int nrep=50; +// for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) { + printf("ON DEVICE ============== nrep is %d\n",nrep); + for(int irep=0; irep<nrep; irep++) { + idPart[irep] = 1; + idEnergy[irep] = 1.0; + printf("%f\n",idSampled[irep]); + } + for(int irep=0; irep<nrep; irep++) { + + idPart[irep] = (int) (idSampled[irep] * TPartIndex::I()->NPartReac()); + int ireac = idSampled[irep] * FNPROC; + // int ireac = idSampled[irep] * 18; + float en = idSampled[irep] * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin()) + + TPartIndex::I()->Emin(); + //cout<<"using RNG "<<ipart<<endl; + float xs = TEXsec::Element(iel)->XS(idPart[irep], ireac, en); + if(xs < 0) continue; + int npart=0; + float weight=0; + float kerma=0; + float enr = 0.; + const int *pid=0; + const float *mom=0; + int ebinindx=0; + TEFstate::Element(iel)->GetReac(idPart[irep], ireac, en, 1,npart, weight, kerma, enr, pid, mom); + //TEFstate::Element(iel)->SampleReac(idPart[irep], ireac, en, npart, weight, kerma, enr, pid, mom, ebinindx); + idEnergy[irep]=enr; + if (npart>0) + printf("idPart %d, PDG %d, xs %f,ireac %d, energy %f, npart %d, enr %f, pid %d, mom %f %f %f \n", idPart[irep], TPartIndex::I()->PDG(idPart[irep]), xs,ireac, en, npart, enr, pid[0],mom[0],mom[1],mom[2]); + // printf("dev energy for part %s is %f\n", TPartIndex::I()->PartName(idPart[irep]), idEnergy[irep]); + // if(npart <= 0) continue; + } + // } + return; } namespace vecgeom { namespace cxx { @@ -48,12 +85,13 @@ template void DevicePtr<char>::Construct() const; } // End cxx namespace } -void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads) { +void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads, double *idSampled, int* idPart, float* idEnergy) { +//void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads, int nrep, vecgeom::cxx::DevicePtr<double> idPart, vecgeom::cxx::DevicePtr<double> idEnergy) { int threadsPerBlock = nThreads; int blocksPerGrid = nBlocks; printf("Launching expandPhysics threads: %d, blocks: %d\n",nThreads,nBlocks); - expandPhysics<<< blocksPerGrid, threadsPerBlock >>>(devBuf); + expandPhysics<<< blocksPerGrid, threadsPerBlock >>>(devBuf, idSampled, idPart, idEnergy); cudaDeviceSynchronize(); }