diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d2d801dd1e45dd0531033e83bbde5866de2fdbe..70ee1f0b15442ed13b12f4d536a98edc71fb3f7f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -462,14 +462,15 @@ add_subdirectory (xsec)
 add_subdirectory (vecprot_v2)
 add_subdirectory (examples)
 if (NOT MIC)
-add_subdirectory (tabxsec)
 add_subdirectory (magneticfield)
+if (NOT CUDA)
+add_subdirectory (tabxsec)
 add_subdirectory (test-small)
 add_subdirectory (test-complex)
-
 add_subdirectory (cmstrack)
 add_subdirectory (Nudy)
 endif()
+endif()
 
 if(USE_VECPHYS)
  add_subdirectory (vectphysproc)
diff --git a/xsec/CMakeLists.txt b/xsec/CMakeLists.txt
index d150819a021dcc46ef0e4902ccdbe3cb5cb7cebf..d499e822bc57b450b9c76fcc5d934c1218f68f02 100644
--- a/xsec/CMakeLists.txt
+++ b/xsec/CMakeLists.txt
@@ -86,14 +86,14 @@ if (CUDA)
 
 
  set(SRC_CPP_RELATIVE
+     xsec/src/TEFstate.cxx
      xsec/src/TPartIndex.cxx
      xsec/src/TPXsec.cxx
      xsec/src/TEXsec.cxx
      xsec/src/TPDecay.cxx
      xsec/src/TFinState.cxx
      xsec/src/TPFstate.cxx
-     xsec/src/TEFstate.cxx
-#     xsec/src/TMXsec.cxx
+ #    xsec/src/TMXsec.cxx
  #    xsec/src/TTabPhysMgr.cxx
  #    xsec/src/TTabPhysProcess.cxx
    )
@@ -121,21 +121,12 @@ if (CUDA)
 
    endforeach()
 
-#ROOT_GENERATE_DICTIONARY(xseccudaDict ${headers} MODULE libxseccuda LINKDEF inc/cudaLinkDef.h )
-
-#set(SRC_CUDA ${SRC_CUDA} xseccudaDict.cxx )
 
  cuda_add_library(xseccuda
      ${SRC_CUDA}
-    OPTIONS ${CUDA_ARCH} 
+    OPTIONS ${CUDA_ARCH} "-DGEANT_NVCC"
 )  
-#target_link_libraries(xseccuda  ${VECGEOM_LIBRARIES} ${VC_LIBRARIES} ${HEPMC_LIBS})
 
-#add_dependencies(xseccuda Xsec)
-#get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
-#foreach(dir ${dirs})
-#message (STATUS "dir='${dirs}'")
-#endforeach()
 add_custom_target(testserial ALL DEPENDS xseccuda)
 message (STATUS "vecgeom defs ${CUDA_ARCH}")
 
@@ -143,12 +134,11 @@ ADD_CUSTOM_COMMAND(
     TARGET testserial
     PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda"
-    COMMAND ${CUDA_HOST_COMPILER} -c --std=c++11 ${VECGEOM_DEFINITIONS} ${GEANTV_COMPILE_DEFINITIONS} ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cpp -o testserialReadCuda.cpp.o -I${VECGEOM_INSTALL_DIR}/include -I${CUDA_TOOLKIT_ROOT_DIR}/include 
+   COMMAND ${CUDA_HOST_COMPILER} -c -v --std=c++11 -DUSE_VECGEOM_NAVIGATOR ${VECGEOM_DEFINITIONS} ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cpp -o testserialReadCuda.cpp.o -I${VECGEOM_INSTALL_DIR}/include -I${CUDA_TOOLKIT_ROOT_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc -I${VECGEOM_INSTALL_DIR}/include/
     COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda nvcc ${GEANTV_COMPILE_DEFINITIONS}"
-    COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} ${VECGEOM_DEFINITIONS} -DUSE_VECGEOM_NAVIGATOR -dc -arch=sm_30 ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cu -o testserial.o -I${VECGEOM_INSTALL_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc
+    COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} ${VECGEOM_DEFINITIONS} -DUSE_VECGEOM_NAVIGATOR --debug --device-debug -lineinfo -DUSE_VECGEOM_NAVIGATOR -dc -arch=sm_30 ${CMAKE_SOURCE_DIR}/xsec/testserialReadCuda.cu -o testserial.o -I${VECGEOM_INSTALL_DIR}/include -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_SOURCE_DIR}/xsec/inc -I${CMAKE_SOURCE_DIR}/base/inc
     COMMAND ${CMAKE_COMMAND} -E echo "Separate Compilation for testserialReadCuda linking"
-    COMMAND ${CUDA_NVCC_EXECUTABLE} ${CUDA_NVCC_FLAGS} --verbose -arch=sm_30  testserialReadCuda.cpp.o testserial.o -L${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -lxseccuda -L${VECGEOM_INSTALL_DIR}/lib/ -lrawvecgeomcuda -lvecgeom -o runtest
-    
+    COMMAND ${CUDA_NVCC_EXECUTABLE} --verbose -arch=sm_30 -lineinfo  testserialReadCuda.cpp.o testserial.o -L${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -lxseccuda -L${VECGEOM_INSTALL_DIR}/lib/ -lrawvecgeomcuda -lvecgeom ${Vc_LIBRARIES} ${ROOT_LIBRARIES} -o runtest
   )
 
 endif()
diff --git a/xsec/inc/TEFstate.h b/xsec/inc/TEFstate.h
index 5f83e3186c42f012afde2f3bc2dbb0cfd8481f86..defc797f484ee0474f272cd05e8118030ac013a1 100644
--- a/xsec/inc/TEFstate.h
+++ b/xsec/inc/TEFstate.h
@@ -15,10 +15,13 @@
 //                                                                      //
 //////////////////////////////////////////////////////////////////////////
 #include "TPFstate.h"
+#include "Geant/Config.h"
 
 #ifndef GEANT_NVCC
+#ifdef USE_ROOT
 class TFile;
 #endif
+#endif
 
 class TFinState;
 class TPDecay;
diff --git a/xsec/testserialReadCuda.cpp b/xsec/testserialReadCuda.cpp
index 59faa0a46138c7c4a8b33633a6a47eb95b4e5c37..ace3738fc2003b6299060bbf419226e416895fd3 100644
--- a/xsec/testserialReadCuda.cpp
+++ b/xsec/testserialReadCuda.cpp
@@ -1,13 +1,11 @@
-/*
-#include "TEXsec.h"
-#include "TEFstate.h"
-#include "TPDecay.h"
 
+/*
 #include "TTabPhysMgr.h"
 #include "GeantPropagator.h"
 #ifdef USE_ROOT
 #include "TGeoManager.h"
 #endif
+*/
 
 #ifdef USE_VECGEOM_NAVIGATOR
 #include "base/RNG.h"
@@ -19,13 +17,40 @@ using vecgeom::RNG;
 #else
 #define UNIFORM() ((double)rand())/RAND_MAX
 #endif
-*/
 
 #include <iostream>
 #include <fstream>
 #include "backend/cuda/Interface.h"
-void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads);
-
+//#include "TEXsec.h"
+//#include "TEFstate.h"
+//#include "TPDecay.h"
+//void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads, int nrep, vecgeom::DevicePtr<double> devIPart, vecgeom::DevicePtr<double> devIEnergy);
+void launchExpandPhysicsOnDevice(vecgeom::DevicePtr<char>&, int nBlocks, int nThreads, double* iSampled,int* devIPart, float* devIEnergy);
+/*
+void expandPhysicsLocal(char *buf) {
+   std::cout << "Rebuilding TPartIndex store" << std::endl;
+   TPartIndex::I()->RebuildClass(buf);
+   int sizet = TPartIndex::I()->SizeOf();
+   std::cout << "Number of bytes for TPartIndex " << sizet << std::endl;
+   buf += sizet;
+   std::cout << "Rebuilding x-sec store" << std::endl;
+   TEXsec::RebuildStore(buf);
+   int sizex = TEXsec::SizeOfStore();
+   std::cout << "Number of bytes for x-sec " << sizex << std::endl;
+   buf += sizex;
+   std::cout << "Rebuilding decay store" << std::endl;
+   TPDecay *dec = (TPDecay *) buf;
+   dec->RebuildClass();
+   TEFstate::SetDecayTable(dec);
+   int sized = dec->SizeOf();
+   std::cout << "Number of bytes for decay " << sized << std::endl;
+   buf += sized;
+   std::cout << "Rebuilding final state store" << std::endl;
+   TEFstate::RebuildStore(buf);
+   int sizef = TEFstate::SizeOfStore();
+   std::cout << "Number of bytes for final state -- HOST --" << sizef << std::endl;
+}
+*/
 int main()
 {
    char *hostBuf=nullptr;
@@ -51,11 +76,114 @@ int main()
 
    printf("Total size of store %d\n", totsize);
 
-   launchExpandPhysicsOnDevice(devBuf, 1, 1);
+   const unsigned int nrep = 50;
+   double *iSampled;
+   float *iEnergy; 
+   int *iPart; 
+   cudaMallocHost(&iSampled, nrep*sizeof(double));
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating iSample on Host: %s \n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+   cudaMallocHost(&iEnergy, nrep*sizeof(float));
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating iEnergy on Host: %s \n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+   cudaMallocHost(&iPart, nrep*sizeof(int));
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating iPart on Host: %s \n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+
+   printf("Number of repetitions %d\n", nrep);
+   
+
+   for(auto irep=0; irep<nrep; irep++) {
+      iSampled[irep] = (double) UNIFORM();
+   }
+/*
+   vecgeom::DevicePtr<double> devIPart;
+   vecgeom::DevicePtr<double> devIEnergy;
+
+   devIPart.Allocate(nrep);
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR ALLOC buffer\n");
+      return 0;
+   } 
+
+   devIEnergy.Allocate(nrep);
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR ALLOC buffer\n");
+      return 0;
+   }
 
-  /*  
-   expandPhysicsKernel<<<>>>(devBuf);
+ 
+   devIPart.ToDevice(iSampled,nrep);
+ */
+    double  *devISampled;
+    int  *devIPart;
+    float  *devIEnergy;
 
+    cudaMalloc((void**) &devISampled, nrep*sizeof(double)); 
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating devIPart to Device: %s \n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+    cudaMalloc((void**) &devIPart, nrep*sizeof(int)); 
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating devIPart to Device: %s \n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+   cudaMalloc((void**) &devIEnergy, nrep*sizeof(float)); 
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR allocating devIEnergy to Device: %s\n",cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+ 
+    cudaMemcpy(devISampled,iSampled, nrep*sizeof(double),cudaMemcpyHostToDevice);
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR copying iSampled to devIPart on Device:%s \n", cudaGetErrorString(cudaGetLastError()));
+      return 0;
+   }
+
+  launchExpandPhysicsOnDevice(devBuf, 1, 1,devISampled, devIPart,devIEnergy);
+
+
+
+cudaThreadSynchronize();
+    cudaMemcpy(iSampled,devISampled, nrep*sizeof(double),cudaMemcpyDeviceToHost);
+   cudaError_t error=cudaGetLastError(); 
+   if (error != cudaSuccess) {
+      printf(" ERROR copy iSampled from Device: %s\n", cudaGetErrorString(error));
+      return 0;
+   }
+
+    cudaMemcpy(iPart,devIPart, nrep*sizeof(int),cudaMemcpyDeviceToHost);
+    error=cudaGetLastError(); 
+   if (error != cudaSuccess) {
+      printf(" ERROR copy iPart from Device: %s\n", cudaGetErrorString(error));
+      return 0;
+   }
+    cudaMemcpy(iEnergy,devIEnergy, nrep*sizeof(float),cudaMemcpyDeviceToHost);
+   if (cudaGetLastError() != cudaSuccess) {
+      printf(" ERROR copy iEnergy from Device\n");
+      return 0;
+   }
+    
+
+   
+/*
+   CudaCopyFromDevice(iEnergy,devIEnergy, devIEnergy.SizeOf());
+   CudaCopyFromDevice(iPart,devIPart, devIPart.SizeOf());
+*/
+
+   printf("============ BACK TO THE HOST ==============\n");
+   for(auto irep=0; irep<nrep; irep++) 
+      printf(" energy for iPart %d is %f\n",iPart[irep], iEnergy[irep]);
+
+/*
+   expandPhysicsLocal(hostBuf);
    const char *fxsec = "/dev/null";
    const char *ffins = "/dev/null";
    #ifdef USE_ROOT
@@ -74,14 +202,17 @@ int main()
    srand(12345);
    #endif
    #endif
+*/
 
    std::ofstream fftest("xphysR.txt");
-   for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) {
-      for(auto irep=0; irep<nrep; ++irep) {
 
-	 int ipart = UNIFORM() * TPartIndex::I()->NPartReac();
-         int ireac = UNIFORM() * FNPROC;
-	 float en =  UNIFORM() * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin())
+//   for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) {
+   int iel =1;
+      for(auto irep=0; irep<nrep; ++irep) {
+/*
+	 int ipart = iPart[irep] * TPartIndex::I()->NPartReac();
+         int ireac = iPart[irep] * FNPROC;
+	 float en =  iPart[irep] * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin())
 	    + TPartIndex::I()->Emin();
           //cout<<"using RNG "<<ipart<<endl;
          float xs = TEXsec::Element(iel)->XS(ipart, ireac, en);
@@ -95,43 +226,21 @@ int main()
 	 int ebinindx=0;
 	 TEFstate::Element(iel)->SampleReac(ipart, ireac, en, npart, weight, kerma, enr, pid, mom, ebinindx);
 	 if(npart <= 0) continue;
+         printf(" energy HOST %d\n",enr);
 	 fftest <<  iel << ":" << TPartIndex::I()->PartName(ipart) << ":" << ireac << ":" << en
 		<< ":" << xs << ":" << npart << ":" << weight << ":" << kerma << ":" << enr << ":";
 	 for(auto i=0; i<npart; ++i)
 	    fftest << pid[i] << ":" << mom[i*3] << ":" << mom[i*3+1] << ":" << mom[i*3+2];
 	 fftest <<":" << ebinindx << std::endl;
+*/
+	 fftest <<  iPart[irep] << ":" << iEnergy[irep] <<std::endl;
       }
-   }
+  /// }
    fftest.close();
+/*
    #ifdef USE_ROOT
    delete geom;
    #endif
 */
-   printf("basta!");
    return 0;
 }
-/*
-void expandPhysicsKernel<<<>>>(char *buf) {
-   std::cout << "Rebuilding TPartIndex store" << std::endl;
-   TPartIndex::I()->RebuildClass(buf);
-   int sizet = TPartIndex::I()->SizeOf();
-   std::cout << "Number of bytes for TPartIndex " << sizet << std::endl;
-   buf += sizet;
-   std::cout << "Rebuilding x-sec store" << std::endl;
-   TEXsec::RebuildStore(buf);
-   int sizex = TEXsec::SizeOfStore();
-   std::cout << "Number of bytes for x-sec " << sizex << std::endl;
-   buf += sizex;
-   std::cout << "Rebuilding decay store" << std::endl;
-   TPDecay *dec = (TPDecay *) buf;
-   dec->RebuildClass();
-   TEFstate::SetDecayTable(dec);
-   int sized = dec->SizeOf();
-   std::cout << "Number of bytes for decay " << sized << std::endl;
-   buf += sized;
-   std::cout << "Rebuilding final state store" << std::endl;
-   TEFstate::RebuildStore(buf);
-   int sizef = TEFstate::SizeOfStore();
-   std::cout << "Number of bytes for final state " << sizef << std::endl;
-}
-*/
diff --git a/xsec/testserialReadCuda.cu b/xsec/testserialReadCuda.cu
index 9c21a34f37ad9a5e74c2002defc4dc85cc7b0848..8147c76929ed1c72c97c5dbfe8381a6adc8da73f 100644
--- a/xsec/testserialReadCuda.cu
+++ b/xsec/testserialReadCuda.cu
@@ -1,6 +1,5 @@
-
-
 #include "backend/cuda/Interface.h"
+
 #include "TPartIndex.h"
 #include "materials/Particle.h"
 using vecgeom::Particle; 
@@ -8,15 +7,17 @@ using vecgeom::Particle;
 #include "TPDecay.h"
 
 #include "TEFstate.h"
-
+#include "base/RNG.h"
+using vecgeom::RNG;
+#define UNIFORM() RNG::Instance().uniform()
+#define FNPROC 18
 /*
 #include "TTabPhysMgr.h"
 */
 
 __global__
-void expandPhysics(char *buf) {
-   printf("Rebuilding TPartIndex store\n");
-   Particle::CreateParticles();
+void expandPhysics(char *buf, double *idSampled, int* idPart, float* idEnergy) {
+   printf("Rebuild TPartIndex class\n");
    TPartIndex::I()->RebuildClass(buf);
 
    int sizet = TPartIndex::I()->SizeOf();
@@ -34,10 +35,46 @@ void expandPhysics(char *buf) {
    int sized = dec->SizeOf();
    printf("Number of bytes for decay %d\n",sized);
    buf += sized;
-   printf("Rebuilding final state store");
+   printf("Rebuilding final state store\n");
    TEFstate::RebuildStore(buf);
    int sizef = TEFstate::SizeOfStore();
    printf("Number of bytes for final state %d\n",sizef);
+   int iel = 1;
+   int nrep=50;
+//   for(auto iel=0; iel<TEXsec::NLdElems(); ++iel) {
+      printf("ON DEVICE ============== nrep is %d\n",nrep);
+      for(int irep=0; irep<nrep; irep++) {
+         idPart[irep] = 1;
+         idEnergy[irep] = 1.0;
+         printf("%f\n",idSampled[irep]);
+      }
+      for(int irep=0; irep<nrep; irep++) {
+
+	 idPart[irep] = (int) (idSampled[irep] * TPartIndex::I()->NPartReac());
+         int ireac = idSampled[irep] * FNPROC;
+        // int ireac = idSampled[irep] * 18;
+	 float en =  idSampled[irep] * (TPartIndex::I()->Emax() - TPartIndex::I()->Emin())
+	    + TPartIndex::I()->Emin();
+          //cout<<"using RNG "<<ipart<<endl;
+         float xs = TEXsec::Element(iel)->XS(idPart[irep], ireac, en);
+ 	 if(xs < 0) continue;
+	 int npart=0;
+	 float weight=0;
+	 float kerma=0;
+         float enr = 0.;
+	 const int *pid=0;
+	 const float *mom=0;
+	 int ebinindx=0;
+         TEFstate::Element(iel)->GetReac(idPart[irep], ireac, en, 1,npart, weight, kerma, enr, pid, mom);
+	 //TEFstate::Element(iel)->SampleReac(idPart[irep], ireac, en, npart, weight, kerma, enr, pid, mom, ebinindx);
+         idEnergy[irep]=enr;
+         if (npart>0)
+         printf("idPart %d, PDG %d, xs %f,ireac %d,  energy %f, npart %d, enr %f, pid %d, mom %f %f %f \n", idPart[irep], TPartIndex::I()->PDG(idPart[irep]), xs,ireac, en, npart, enr, pid[0],mom[0],mom[1],mom[2]);
+       //  printf("dev energy for part %s is %f\n", TPartIndex::I()->PartName(idPart[irep]), idEnergy[irep]);
+	// if(npart <= 0) continue;
+    }
+ // }
+  return;
 }
 namespace vecgeom {
 namespace cxx {
@@ -48,12 +85,13 @@ template void DevicePtr<char>::Construct() const;
 } // End cxx namespace
 }
 
-void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads) {
+void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads, double *idSampled, int* idPart, float* idEnergy) {
+//void launchExpandPhysicsOnDevice(vecgeom::cxx::DevicePtr<char> &devBuf, int nBlocks, int nThreads, int nrep, vecgeom::cxx::DevicePtr<double> idPart, vecgeom::cxx::DevicePtr<double> idEnergy) {
  int threadsPerBlock = nThreads;
  int blocksPerGrid   = nBlocks;
    printf("Launching expandPhysics threads: %d, blocks: %d\n",nThreads,nBlocks);
 
-  expandPhysics<<< blocksPerGrid, threadsPerBlock >>>(devBuf);
+  expandPhysics<<< blocksPerGrid, threadsPerBlock >>>(devBuf, idSampled, idPart, idEnergy);
    cudaDeviceSynchronize();
 }