diff --git a/Control/AthCUDA/AthCUDACore/src/Memory.cu b/Control/AthCUDA/AthCUDACore/src/Memory.cu index 94305f8b3377e7de169d7c98c6957cb3d2b2bbd2..476008d37593b692a94a5d0ce067ea7d4355c341 100644 --- a/Control/AthCUDA/AthCUDACore/src/Memory.cu +++ b/Control/AthCUDA/AthCUDACore/src/Memory.cu @@ -76,11 +76,7 @@ namespace AthCUDA { // If a device is available, then free up the memory using CUDA. if( Info::instance().nDevices() != 0 ) { - taskArena().enqueue( ::DeviceDeleterTask( ptr ) -#if __TBB_TASK_PRIORITY - , tbb::priority_normal -#endif // __TBB_TASK_PRIORITY - ); + taskArena().enqueue( ::DeviceDeleterTask( ptr ) ); return; } @@ -98,11 +94,7 @@ namespace AthCUDA { // If a device is available, then free up the memory using CUDA. if( Info::instance().nDevices() != 0 ) { - taskArena().enqueue( ::DeviceDeleterTask( ptr ) -#if __TBB_TASK_PRIORITY - , tbb::priority_normal -#endif // __TBB_TASK_PRIORITY - ); + taskArena().enqueue( ::DeviceDeleterTask( ptr ) ); return; } @@ -120,11 +112,7 @@ namespace AthCUDA { // If a device is available, then free up the memory using CUDA. if( Info::instance().nDevices() != 0 ) { - taskArena().enqueue( ::HostDeleterTask( ptr ) -#if __TBB_TASK_PRIORITY - , tbb::priority_normal -#endif // __TBB_TASK_PRIORITY - ); + taskArena().enqueue( ::HostDeleterTask( ptr ) ); return; } diff --git a/Control/AthCUDA/AthCUDAKernel/AthCUDAKernel/ArrayKernelTaskImpl.cuh b/Control/AthCUDA/AthCUDAKernel/AthCUDAKernel/ArrayKernelTaskImpl.cuh index 5ff7c50d3aec6ca625579f7b76db61083b0e8e79..234c83fd70a28bcb43ae57c41371801d0a3fe587 100644 --- a/Control/AthCUDA/AthCUDAKernel/AthCUDAKernel/ArrayKernelTaskImpl.cuh +++ b/Control/AthCUDA/AthCUDAKernel/AthCUDAKernel/ArrayKernelTaskImpl.cuh @@ -20,11 +20,11 @@ /// Helper macro for status code checks inside of these functions #define AKT_CHECK( EXP ) \ do { \ - const int _result = EXP; \ - if( _result != 0 ) { \ + const int exp_result = EXP; \ + if( exp_result != 0 ) { \ std::cerr << __FILE__ << ":" << __LINE__ \ << " Failed to execute: " << #EXP << std::endl; \ - return _result; \ + return exp_result; \ } \ } while( false ) @@ -385,11 +385,23 @@ namespace { "Only trivial arrays are supported" ); public: /// Operator scheduling the host->device copy of one array - int operator()( cudaStream_t stream, std::size_t arraySizes, - typename ArrayKernelTaskHostVariables< ARGS... >::type& - hostArgs, - typename ArrayKernelTaskDeviceVariables< ARGS... >::type& - deviceArgs ) { + int operator()( cudaStream_t +#ifdef __CUDACC__ + stream +#endif // __CUDACC__ + , std::size_t +#ifdef __CUDACC__ + arraySizes +#endif // __CUDACC__ + , typename ArrayKernelTaskHostVariables< ARGS... >::type& +#ifdef __CUDACC__ + hostArgs +#endif // __CUDACC__ + , typename ArrayKernelTaskDeviceVariables< ARGS... >::type& +#ifdef __CUDACC__ + deviceArgs +#endif // __CUDACC__ + ) { // Schedule the H->D copy. CUDA_EXP_CHECK( cudaMemcpyAsync( std::get< Index >( deviceArgs ).get(), std::get< Index >( hostArgs ).get(), @@ -502,11 +514,23 @@ namespace { "Only trivial arrays are supported" ); public: /// Operator scheduling the device->host copy of one array - int operator()( cudaStream_t stream, std::size_t arraySizes, - typename ArrayKernelTaskDeviceVariables< ARGS... >::type& - deviceObjs, - typename ArrayKernelTaskHostVariables< ARGS... >::type& - hostObjs ) { + int operator()( cudaStream_t +#ifdef __CUDACC__ + stream +#endif // __CUDACC__ + , std::size_t +#ifdef __CUDACC__ + arraySizes +#endif // __CUDACC__ + , typename ArrayKernelTaskDeviceVariables< ARGS... >::type& +#ifdef __CUDACC__ + deviceObjs +#endif // __CUDACC__ + , typename ArrayKernelTaskHostVariables< ARGS... >::type& +#ifdef __CUDACC__ + hostObjs +#endif // __CUDACC__ + ) { // Schedule the D->H copy. CUDA_EXP_CHECK( cudaMemcpyAsync( std::get< Index >( hostObjs ).get(), std::get< Index >( deviceObjs ).get(), @@ -801,8 +825,16 @@ namespace { /// Function called at the end of the recursive function calls. This /// is the function that actually does something. template< typename... ARGS1 > - static int execute( cudaStream_t stream, std::size_t arraySizes, - const std::tuple<>&, ARGS1... args ) { + static int execute( cudaStream_t +#ifdef __CUDACC__ + stream +#endif // __CUDACC__ + , std::size_t arraySizes, const std::tuple<>&, + ARGS1... +#ifdef __CUDACC__ + args +#endif // __CUDACC__ + ) { // If the arrays are empty, return right away. if( arraySizes == 0 ) { diff --git a/Control/AthCUDA/AthCUDAServices/src/KernelRunnerSvcImpl.cu b/Control/AthCUDA/AthCUDAServices/src/KernelRunnerSvcImpl.cu index d8e6335ead307f273985ed6c6b5add2fe2253e3f..909feb7db78248c180e421229fbc5da8d937eeff 100644 --- a/Control/AthCUDA/AthCUDAServices/src/KernelRunnerSvcImpl.cu +++ b/Control/AthCUDA/AthCUDAServices/src/KernelRunnerSvcImpl.cu @@ -114,11 +114,7 @@ namespace AthCUDA { // kernel. taskArena().enqueue( ::KernelSchedulerTask( m_callback, std::move( task ), - *this ) -#if __TBB_TASK_PRIORITY - , tbb::priority_normal -#endif // __TBB_TASK_PRIORITY - ); + *this ) ); // Return gracefully. return;