diff --git a/Control/AthenaExamples/AthExCUDA/CMakeLists.txt b/Control/AthenaExamples/AthExCUDA/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7689e9629c8d7037f6b09d017724c9b9a23d8772 --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# This package needs CUDA to be handled as a "first class language" by CMake. +cmake_minimum_required( VERSION 3.10 ) + +# Set the name of the package. +atlas_subdir( AthExCUDA ) + +# The build of this package needs CUDA. If it's not available, don't try to do +# anything... +include( CheckLanguage ) +check_language( CUDA ) +if( NOT CMAKE_CUDA_COMPILER ) + message( STATUS "CUDA not found, AthExCUDA is not built" ) + return() +endif() +enable_language( CUDA ) + +# Add a component library that has some CUDA code in it. +atlas_add_component( AthExCUDA + src/*.h src/*.cxx src/*.cu src/components/*.cxx + LINK_LIBRARIES AthenaBaseComps GaudiKernel ) + +# Install extra files from the package. +atlas_install_joboptions( share/*.py ) diff --git a/Control/AthenaExamples/AthExCUDA/README.md b/Control/AthenaExamples/AthExCUDA/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a9a4ae0a0bd4be9491bb02b6de8cf21519f72a95 --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/README.md @@ -0,0 +1,16 @@ +# CUDA Example Package + +This package is meant to hold code demonstrating how to use CUDA directly +from Athena. + +For the package to "do anything", CUDA needs to be available in the build +environment. Note that it does not necessarily need to be available in +the runtime environment, as by default CMake links the package's library +against the CUDA runtime library statically. + +CUDA may be available on lxplus in other locations as well, but one place +that should be available for now is: + +``` +/afs/cern.ch/work/k/krasznaa/public/cuda +``` diff --git a/Control/AthenaExamples/AthExCUDA/share/LinearTransformExample_jobOptions.py b/Control/AthenaExamples/AthExCUDA/share/LinearTransformExample_jobOptions.py new file mode 100644 index 0000000000000000000000000000000000000000..6b7ae4cf23d0a931ab6d11cd2f129598a26ae6cb --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/share/LinearTransformExample_jobOptions.py @@ -0,0 +1,15 @@ +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +# +# "Standalone" jobOptions for running AthCUDA::LinearTransformExampleAlg. +# + +# Set up / access the algorithm sequence. +from AthenaCommon.AlgSequence import AlgSequence +algSequence = AlgSequence() + +# Add the algorithm to the sequence. +from AthExCUDA.AthExCUDAConf import AthCUDA__LinearTransformExampleAlg +algSequence += AthCUDA__LinearTransformExampleAlg() + +# Run for 10 "events". +theApp.EvtMax = 10 diff --git a/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.cxx b/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.cxx new file mode 100644 index 0000000000000000000000000000000000000000..01868723467ad2d12799d164c86eed598dca3dfb --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.cxx @@ -0,0 +1,45 @@ +// +// Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +// + +// Local include(s). +#include "LinearTransformExampleAlg.h" +#include "cudaMultiply.h" + +// System include(s). +#include <cmath> +#include <vector> + +namespace AthCUDA { + + StatusCode + LinearTransformExampleAlg::execute( const EventContext& ) const { + + // Create a dummy array variable that will be multiplied by some amount. + static const std::size_t ARRAY_SIZE = 10000; + std::vector< float > dummyArray; + dummyArray.reserve( ARRAY_SIZE ); + static const float ARRAY_ELEMENT = 3.141592f; + for( std::size_t i = 0; i < ARRAY_SIZE; ++i ) { + dummyArray.push_back( ARRAY_ELEMENT ); + } + + // Call on a function, which would synchronously make some modification + // to this vector. + static const float MULTIPLIER = 1.23f; + cudaMultiply( dummyArray, MULTIPLIER ); + + // Check if the operation succeeded. + static const float EXPECTED_RESULT = ARRAY_ELEMENT * MULTIPLIER; + for( std::size_t i = 0; i < ARRAY_SIZE; ++i ) { + if( std::abs( dummyArray[ i ] - EXPECTED_RESULT ) > 0.001 ) { + ATH_MSG_ERROR( "The CUDA transformation failed to run" ); + return StatusCode::FAILURE; + } + } + + // Return gracefully. + return StatusCode::SUCCESS; + } + +} // namespace AthCUDA diff --git a/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.h b/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.h new file mode 100644 index 0000000000000000000000000000000000000000..85586375e8fab1bd95f33d7f3ce5d495c7a3c36f --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/src/LinearTransformExampleAlg.h @@ -0,0 +1,33 @@ +// Dear emacs, this is -*- c++ -*- +// +// Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +// +#ifndef ATHEXCUDA_LINEARTRANSFORMEXAMPLEALG_H +#define ATHEXCUDA_LINEARTRANSFORMEXAMPLEALG_H + +// Framework include(s). +#include "AthenaBaseComps/AthReentrantAlgorithm.h" + +namespace AthCUDA { + + /// Example algorithm running a very simple operation using CUDA + /// + /// This is just to demonstrate how to organise C++ + CUDA code in Athena + /// to execute CUDA code "directly". + /// + /// @author Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch> + /// + class LinearTransformExampleAlg : public AthReentrantAlgorithm { + + public: + /// Inherit the base class's constructor + using AthReentrantAlgorithm::AthReentrantAlgorithm; + + /// The function executing this algorithm + StatusCode execute( const EventContext& ctx ) const; + + }; // class LinearTransformExampleAlg + +} // namespace AthCUDA + +#endif // ATHEXCUDA_LINEARTRANSFORMEXAMPLEALG_H diff --git a/Control/AthenaExamples/AthExCUDA/src/components/AthCUDAExample_entries.cxx b/Control/AthenaExamples/AthExCUDA/src/components/AthCUDAExample_entries.cxx new file mode 100644 index 0000000000000000000000000000000000000000..472381b9b2dac9975696392b5ae1da811de63fed --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/src/components/AthCUDAExample_entries.cxx @@ -0,0 +1,9 @@ +// +// Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +// + +// Local include(s). +#include "../LinearTransformExampleAlg.h" + +// Declare the "components". +DECLARE_COMPONENT( AthCUDA::LinearTransformExampleAlg ) diff --git a/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.cu b/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.cu new file mode 100644 index 0000000000000000000000000000000000000000..7437fdc381e08c2cf1c7a93ea08285cca1f6404e --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.cu @@ -0,0 +1,75 @@ +// +// Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +// + +// Local include(s). +#include "cudaMultiply.h" + +// CUDA include(s). +#include <cuda.h> + +// System include(s). +#include <iostream> + +/// Simple macro to run CUDA commands with +#define CUDA_CHECK( EXP ) \ + do { \ + const cudaError_t ce = EXP; \ + if( ce != cudaSuccess ) { \ + std::cerr << "Failed to execute: " << #EXP << std::endl; \ + std::cerr << "Reason: " << cudaGetErrorString( ce ) << std::endl; \ + return; \ + } \ + } while( false ) + +namespace AthCUDA { + + /// Very simple kernel performing a multiplication on an array. + __global__ + void cudaMultiplyKernel( int n, float* array, float multiplier ) { + + const int index = blockIdx.x * blockDim.x + threadIdx.x; + if( index >= n ) { + return; + } + + array[ index ] *= multiplier; + return; + } + + void cudaMultiply( std::vector< float >& array, float multiplier ) { + + // If no CUDA device is available, complain. + int nCudaDevices = 0; + CUDA_CHECK( cudaGetDeviceCount( &nCudaDevices ) ); + if( nCudaDevices == 0 ) { + return; + } + + // Allocate the array on the/a device, and copy the host array's content + // to the device. + float* deviceArray = nullptr; + CUDA_CHECK( cudaMalloc( &deviceArray, sizeof( float ) * array.size() ) ); + CUDA_CHECK( cudaMemcpy( deviceArray, array.data(), + sizeof( float ) * array.size(), + cudaMemcpyHostToDevice ) ); + + // Run the kernel. + static const int blockSize = 256; + const int numBlocks = ( array.size() + blockSize - 1 ) / blockSize; + cudaMultiplyKernel<<< numBlocks, blockSize >>>( array.size(), + deviceArray, + multiplier ); + CUDA_CHECK( cudaDeviceSynchronize() ); + + // Copy the array back to the host's memory. + CUDA_CHECK( cudaMemcpy( array.data(), deviceArray, + sizeof( float ) * array.size(), + cudaMemcpyDeviceToHost ) ); + + // Free the memory on the device. + CUDA_CHECK( cudaFree( deviceArray ) ); + return; + } + +} // namespace AthCUDA diff --git a/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.h b/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.h new file mode 100644 index 0000000000000000000000000000000000000000..8502e978810a264656fc7c8d3c42806d09e0608f --- /dev/null +++ b/Control/AthenaExamples/AthExCUDA/src/cudaMultiply.h @@ -0,0 +1,18 @@ +// Dear emacs, this is -*- c++ -*- +// +// Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +// +#ifndef ATHEXCUDA_CUDAMULTIPLY_H +#define ATHEXCUDA_CUDAMULTIPLY_H + +// System include(s). +#include <vector> + +namespace AthCUDA { + + /// Function used to multiple a vector of variables by some amount + void cudaMultiply( std::vector< float >& array, float multiplier ); + +} // namespace AthCUDA + +#endif // ATHEXCUDA_CUDAMULTIPLY_H