diff --git a/Trigger/TrigAccel/TrigAccelEvent/CMakeLists.txt b/Trigger/TrigAccel/TrigAccelEvent/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..53821ba33acb37514c9e9ce1de9716f2df0b3bd5 --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/CMakeLists.txt @@ -0,0 +1,14 @@ +################################################################################ +# Package: TrigAccelEvent +################################################################################ + +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# Declare the package name: +atlas_subdir( TrigAccelEvent ) + +# Component(s) in the package: +atlas_add_library( TrigAccelEvent + TrigAccelEvent/*.h + INTERFACE + PUBLIC_HEADERS TrigAccelEvent) diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/DataExportBuffer.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/DataExportBuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..97d7d7640de4557ee0a10837671087d3f78409e8 --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/DataExportBuffer.h @@ -0,0 +1,57 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_DATAEXPORTBUFFER_H +#define TRIGACCELEVENT_DATAEXPORTBUFFER_H + + +#include <string> +#include <fstream> + +namespace TrigAccel { + + typedef struct DataExportBuffer { + public: + DataExportBuffer() : m_size(0), m_buffer(0) {}; + DataExportBuffer(size_t s) : m_size(s) { + m_buffer = new char[s]; + } + ~DataExportBuffer() {delete[] m_buffer;} + + inline bool fit(size_t s) { + return s<=m_size; + } + + void reallocate(size_t s) { + delete[] m_buffer; + m_buffer = new char[s]; + m_size = s; + } + + void save(const std::string& name) const { + std::ofstream binFile(name, std::ios::binary); + binFile.write(m_buffer, m_size); + binFile.close(); + } + + size_t load(const std::string& name) { + std::ifstream binFile(name, std::ios::binary); + if (!binFile) { + return 0; + } + binFile.seekg(0, binFile.end); + size_t fileSize = binFile.tellg(); + binFile.seekg (0, binFile.beg); + reallocate(fileSize); + binFile.read(m_buffer, m_size); + binFile.close(); + return fileSize; + } + + size_t m_size; + char* m_buffer; + } DATA_EXPORT_BUFFER; +} + +#endif diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/OffloadBuffer.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/OffloadBuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..9b8e793ad0755efa1176e2351054fbdf6faf58d5 --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/OffloadBuffer.h @@ -0,0 +1,39 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_OFFLOADBUFFER_H +#define TRIGACCELEVENT_OFFLOADBUFFER_H + +#include "DataExportBuffer.h" +#include <cstring> + +namespace TrigAccel { + + class OffloadBuffer { + public: + OffloadBuffer(size_t size) : m_bufferSize(size) { + m_rawBuffer = new unsigned char[m_bufferSize]; + } + + OffloadBuffer(const DATA_EXPORT_BUFFER* pB) { + const size_t bufferOffset = 256; + m_bufferSize = pB->m_size - bufferOffset; + m_rawBuffer = new unsigned char[m_bufferSize]; + memcpy(m_rawBuffer, pB->m_buffer + bufferOffset, m_bufferSize); + } + + virtual ~OffloadBuffer() { + delete[] m_rawBuffer; + } + + unsigned char* get() { + return m_rawBuffer; + } + + size_t m_bufferSize; + unsigned char* m_rawBuffer; + }; +} + +#endif diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelCodes.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelCodes.h new file mode 100644 index 0000000000000000000000000000000000000000..44a450b477eb55b77a1ccbfe30df4b35a493abc0 --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelCodes.h @@ -0,0 +1,33 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_TRIGINDETACCELCODES_H +#define TRIGACCELEVENT_TRIGINDETACCELCODES_H + +namespace TrigAccel { + + constexpr unsigned int TrigInDetModuleID_CPU = 0x10000000; + + constexpr unsigned int TrigInDetModuleID_CUDA = 0x10100000; + + enum InDetJobControlCode { + PIXBS = 11001, + SCTBS = 12001, + PIX_CL_EXPORT = 11010, + SCT_CL_EXPORT = 12010, + SPS_EXPORT = 13010, + SFS_EXPORT = 13011, + TFS_EXPORT = 13012, + SEED_IMPORT = 13100, + SIL_LAYERS_EXPORT = 10020, + PIX_GEO_EXPORT = 11020, + SCT_GEO_EXPORT = 12020, + ID_CABLING_EXPORT = 10000, + FIND_SEEDS = 14000, + MAKE_SEEDS = 14001, + FIND_TRACKS = 15000 + }; +} + +#endif diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelEDM.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelEDM.h new file mode 100644 index 0000000000000000000000000000000000000000..4d7b14228d2cab53f96471c4023caeb269d02202 --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/TrigInDetAccelEDM.h @@ -0,0 +1,403 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_TRIGINDETACCELEDM_H +#define TRIGACCELEVENT_TRIGINDETACCELEDM_H + +#include<cstdint> + +namespace TrigAccel { + + //A. GPU-accelerated track seeding + + constexpr unsigned int MAX_SILICON_LAYERS = 50; + constexpr unsigned int MAX_NUMBER_PIX_MODULES = 2100; + constexpr unsigned int MAX_NUMBER_SCT_MODULES = 8200; + constexpr unsigned int MAX_NUMBER_SPACEPOINTS = 100000; + constexpr unsigned int MAX_PHI_SLICES = 100; + constexpr unsigned int MAX_NUMBER_OUTPUT_SEEDS = 100000; + + typedef struct SiliconLayer { + public: + int m_subdet;//1 : Pixel, 2 : SCT + int m_type;//0: barrel, +/-n : endcap + float m_refCoord; + int m_nElements; + float m_minBound, m_maxBound; + float m_phiBinWidth, m_rzBinWidth; + int m_nPhiSlices; + + } SILICON_LAYER; + + typedef struct DetectorModel { + public: + int m_nLayers; + int m_nModules; + SILICON_LAYER m_layers[MAX_SILICON_LAYERS]; + int m_hashArray[MAX_NUMBER_PIX_MODULES+MAX_NUMBER_SCT_MODULES]; + float m_minRZ[MAX_NUMBER_PIX_MODULES+MAX_NUMBER_SCT_MODULES]; + float m_maxRZ[MAX_NUMBER_PIX_MODULES+MAX_NUMBER_SCT_MODULES]; + } DETECTOR_MODEL; + + typedef struct SpacePointLayerRange { + public: + int m_layerBegin[MAX_SILICON_LAYERS]; + int m_layerEnd[MAX_SILICON_LAYERS]; + } SPACEPOINT_LAYER_RANGE; + + typedef struct SpacePointStorage { + public: + int m_nSpacepoints; + int m_nPhiSlices; + int m_nLayers; + int m_index[MAX_NUMBER_SPACEPOINTS]; + int m_type[MAX_NUMBER_SPACEPOINTS]; + float m_x[MAX_NUMBER_SPACEPOINTS]; + float m_y[MAX_NUMBER_SPACEPOINTS]; + float m_z[MAX_NUMBER_SPACEPOINTS]; + float m_r[MAX_NUMBER_SPACEPOINTS]; + float m_phi[MAX_NUMBER_SPACEPOINTS]; + float m_covR[MAX_NUMBER_SPACEPOINTS]; + float m_covZ[MAX_NUMBER_SPACEPOINTS]; + SPACEPOINT_LAYER_RANGE m_phiSlices[MAX_PHI_SLICES]; + } SPACEPOINT_STORAGE; + + typedef struct SeedFinderSettings { + public: + unsigned int m_maxBarrelPix, m_minEndcapPix, m_maxEndcapPix, m_maxSiliconLayer; + float m_magFieldZ; + float m_tripletD0Max; + float m_tripletD0_PPS_Max; + float m_tripletPtMin; + int m_tripletDoPSS, m_doubletFilterRZ; + int m_nMaxPhiSlice; + unsigned int m_maxTripletBufferLength; + int m_isFullScan; + float m_zedMinus, m_zedPlus; + + } SEED_FINDER_SETTINGS; + + typedef struct SeedMakingJob { + public: + SEED_FINDER_SETTINGS m_settings; + SPACEPOINT_STORAGE m_data; + } SEED_MAKING_JOB; + + typedef struct OutputSeedStorage { + public: + int m_nSeeds; + int m_nMiddleSps; + int m_nI, m_nO; + int m_nCandidates; + int m_nErrors; + int m_innerIndex[MAX_NUMBER_OUTPUT_SEEDS]; + int m_middleIndex[MAX_NUMBER_OUTPUT_SEEDS]; + int m_outerIndex[MAX_NUMBER_OUTPUT_SEEDS]; + float m_Q[MAX_NUMBER_OUTPUT_SEEDS]; + } OUTPUT_SEED_STORAGE; + + //B. GPU-accelerated track following + + constexpr unsigned int MAX_NUMBER_PIX_HITS = 100000; + constexpr unsigned int MAX_NUMBER_SCT_HITS = 100000; + constexpr unsigned int MAX_NUMBER_INPUT_SEEDS = 50000; + constexpr unsigned int MAX_ROAD_LENGTH = 64; + + typedef struct TrackFinderSettings { + public: + unsigned int m_maxBarrelPix, m_minEndcapPix, m_maxEndcapPix, m_maxSiliconLayer; + float m_magFieldZ; + float m_tripletD0Max; + float m_tripletD0_PPS_Max; + float m_tripletPtMin; + int m_tripletDoPSS, m_doubletFilterRZ; + int m_nMaxPhiSlice; + unsigned int m_maxTripletBufferLength; + int m_isFullScan; + } TRACK_FINDER_SETTINGS; + + typedef struct SiliconPlane{ + public: + double m_Ax[3]; + double m_Ay[3]; + double m_Az[3]; + double m_D[3]; + double m_B[3];//magnetic field in the center + float m_minWidth; + float m_maxWidth; + float m_length; + int m_shape; + } SILICON_PLANE; + + typedef struct SctGeometryStorage { + public: + int m_nModules; + int m_dead[MAX_NUMBER_SCT_MODULES]; + int m_type[MAX_NUMBER_SCT_MODULES]; + SILICON_PLANE m_geoInfo[MAX_NUMBER_SCT_MODULES]; + } SCT_GEO_STORAGE; + + typedef struct PixelGeometryStorage { + public: + int m_nModules; + int m_dead[MAX_NUMBER_PIX_MODULES]; + int m_type[MAX_NUMBER_PIX_MODULES]; + SILICON_PLANE m_geoInfo[MAX_NUMBER_PIX_MODULES]; + } PIXEL_GEO_STORAGE; + + typedef struct PixelClusterStorage { + public: + int m_nModules; + int m_start[MAX_NUMBER_PIX_MODULES]; + int m_end[MAX_NUMBER_PIX_MODULES]; + float m_localX[MAX_NUMBER_PIX_HITS]; + float m_localY[MAX_NUMBER_PIX_HITS]; + float m_covXX[MAX_NUMBER_PIX_HITS]; + float m_covXY[MAX_NUMBER_PIX_HITS]; + float m_covYY[MAX_NUMBER_PIX_HITS]; + } PIXEL_CLUSTER_STORAGE; + + typedef struct SctClusterStorage { + public: + int m_nModules; + int m_start[MAX_NUMBER_SCT_MODULES]; + int m_end[MAX_NUMBER_SCT_MODULES]; + float m_localX[MAX_NUMBER_SCT_HITS]; + float m_covXX[MAX_NUMBER_SCT_HITS]; + } SCT_CLUSTER_STORAGE; + + typedef struct ProtoTrack { + int m_nElements; + int m_nSeedElements; + float m_initialParams[5]; + int m_planeType[MAX_ROAD_LENGTH]; + int m_planeIndex[MAX_ROAD_LENGTH]; + int m_hitIndex[MAX_ROAD_LENGTH]; + int m_seedPlanes[6];//max = 3 SP x 2 clusters + } PROTO_TRACK; + + typedef struct ProtoTrackStorage { + public: + int m_nSeeds; + PROTO_TRACK m_tracks[MAX_NUMBER_INPUT_SEEDS]; + } PROTO_TRACK_STORAGE; + + + typedef struct TrackSeedStorage { + public: + int m_nSeeds; + int m_planeType[MAX_NUMBER_INPUT_SEEDS]; + int m_planeIdx[MAX_NUMBER_INPUT_SEEDS]; + int m_sp1stPlaneIndices[MAX_NUMBER_INPUT_SEEDS][3]; + int m_sp2ndPlaneIndices[MAX_NUMBER_INPUT_SEEDS][3]; + int m_spClusterIndices[MAX_NUMBER_INPUT_SEEDS][6]; + float m_sp1x[MAX_NUMBER_INPUT_SEEDS]; + float m_sp1y[MAX_NUMBER_INPUT_SEEDS]; + float m_sp1z[MAX_NUMBER_INPUT_SEEDS]; + float m_sp2x[MAX_NUMBER_INPUT_SEEDS]; + float m_sp2y[MAX_NUMBER_INPUT_SEEDS]; + float m_sp2z[MAX_NUMBER_INPUT_SEEDS]; + float m_sp3x[MAX_NUMBER_INPUT_SEEDS]; + float m_sp3y[MAX_NUMBER_INPUT_SEEDS]; + float m_sp3z[MAX_NUMBER_INPUT_SEEDS]; + + } TRACK_SEED_STORAGE; + + typedef struct ExtendedTrackStateStruct2 { + float m_par[10]; + float m_cov[55]; + } EXTENDED_TRACK_STATE_TYPE_2; + + typedef struct LocalEstimate { + public: + float m_P[5]; + float m_J[5][5];//jacobian to get to the next plane estimate + float m_path; + } LOCAL_ESTIMATE; + + typedef struct TrackData { + public: + int m_status; + float m_chi2; + int m_ndof; + int m_firstElement; + int m_lastElement; + char m_stat[MAX_ROAD_LENGTH]; + char m_validatedPlanes[MAX_ROAD_LENGTH]; + + LOCAL_ESTIMATE m_E[MAX_ROAD_LENGTH]; + + EXTENDED_TRACK_STATE_TYPE_2 m_ets; + int m_nValidated; + } TRACK_DATA; + + typedef struct SeededOutput { + public: + TRACK_DATA m_data[MAX_NUMBER_INPUT_SEEDS]; + } SEEDED_OUTPUT; + + //C. GPU-accelerated data preparation + + constexpr unsigned int MAX_NUMBER_BS_WORDS = 80000; + constexpr unsigned int MAX_GANGED_PIXELS = 4000; + constexpr unsigned int MAX_GANGED_PIXEL_PER_MODULE = 50; + constexpr unsigned int MAX_PIX_BS_HEADERS = 2048; + constexpr unsigned int MAX_PIX_HASH = 2048; + constexpr unsigned int MAX_SCT_BS_HEADERS = 8500; + constexpr unsigned int MAX_SCT_HASH = 8176; + constexpr unsigned int MAX_BS_ROBF = 200; + constexpr unsigned int MAX_ADJ_HIT_PAIRS = 1024; + constexpr unsigned int SCT_MAX_SP_PER_MODULE = 256; + constexpr unsigned int MAX_PHI_INDEX = 100; + constexpr unsigned int MAX_RZ_INDEX = 300; + constexpr unsigned int MAX_PIX_ROD_INDEX = 200; + constexpr unsigned int MAX_PIX_LINK_INDEX = 8; + constexpr unsigned int MAX_SCT_ROD_INDEX = 90; + constexpr unsigned int MAX_SCT_LINK_INDEX = 96; + + typedef struct SctModuleGeoInfo { + char m_type; + float m_phiPitch; + float m_lorentzShift; + float m_center[3]; + float m_M[3][3]; + //special meaning for endcap + float m_stripLength; + float m_maxRadius; + float m_minRadius; + + } SCT_MODULE_GEO_INFO; + + typedef struct PixelModuleGeoInfo { + char m_type; + char m_bLayer; + float m_phiPitch; + float m_etaPitchLong; + float m_etaPitchNormal; + float m_lorentzShift; + float m_center[3]; + float m_M[3][3]; + float m_halfWidth; + float m_halfLength; + } PIXEL_MODULE_GEO_INFO; + + struct HashQuadruplet { + unsigned short x,y,z,w; + }; + + typedef struct IdCablingInfo { + public: + HashQuadruplet m_pixelRodLinkHashTable[MAX_PIX_ROD_INDEX][MAX_PIX_LINK_INDEX]; + unsigned short m_sctRodLinkHashTable[MAX_SCT_ROD_INDEX][MAX_SCT_LINK_INDEX]; + uint16_t m_pixelModuleInfo[MAX_PIX_HASH]; + uint16_t m_sctModuleInfo[MAX_SCT_HASH]; + PIXEL_MODULE_GEO_INFO m_pixelGeoArray[MAX_PIX_HASH]; + SCT_MODULE_GEO_INFO m_sctGeoArray[MAX_SCT_HASH]; + } ID_CABLING_INFO; + + struct myfloat2 { + float x,y; + }; + + struct myint4 { + int x,y,z,w; + }; + + struct myushort4 { + unsigned short x,y,z,w; + }; + + struct myushort2 { + unsigned short x,y; + }; + + typedef struct InputByteStreamData { + int m_nDataWords; + uint32_t m_rodIds[MAX_NUMBER_BS_WORDS]; + uint32_t m_words[MAX_NUMBER_BS_WORDS]; + float m_xBeamSpot; + float m_yBeamSpot; + } INPUT_BYTESTREAM_DATA; + + + typedef struct DecodedPixelModuleInfo { + int m_headerPositions; + int m_trailerPositions; + int m_gangedStart; + short m_hashIds; + unsigned int m_nClusters; + unsigned int m_nGangedPixels; + } DECODED_PIX_MODULE_INFO; + + typedef struct DecodedPixelHitInfo { + unsigned int m_clusterIds; + unsigned short m_etaIndex; + unsigned short m_phiIndex; + unsigned int m_tot; + } DECODED_PIX_HIT_INFO; + + typedef struct PixelSpacePointType { + float m_position[3]; + unsigned short m_clusterIdx; + } PIXEL_SPACEPOINT_TYPE; + + + typedef struct DecodedPixelData { + public: + int m_nHeaders, m_nTrailers; + int m_nSpacePoints; + + int m_gangedPixelsStart;//initial value = m_nDataWords + int m_nPixels[MAX_PIX_BS_HEADERS];//number of decoded pixels (counting ganged) per module + int m_clusterStarts[MAX_PIX_BS_HEADERS];//for navigation through cluster and SP storage space + DECODED_PIX_MODULE_INFO m_modulesInfo[MAX_PIX_BS_HEADERS]; + unsigned short m_moduleInfoWord[MAX_PIX_BS_HEADERS]; + myfloat2 m_clusterPosition[MAX_NUMBER_BS_WORDS+MAX_GANGED_PIXELS]; + unsigned short m_clusterId[MAX_NUMBER_BS_WORDS+MAX_GANGED_PIXELS]; + DECODED_PIX_HIT_INFO m_hitInfo[MAX_NUMBER_BS_WORDS+MAX_GANGED_PIXELS]; + myint4 m_decodedData[MAX_NUMBER_BS_WORDS+MAX_GANGED_PIXELS]; + + PIXEL_SPACEPOINT_TYPE m_spacePoints[MAX_NUMBER_BS_WORDS+MAX_GANGED_PIXELS]; + + int m_hashToIndex[MAX_PIX_HASH]; + + } DECODED_PIXEL_DATA; + + typedef struct DecodedSctHeaderInfo { + int m_headerPositions; + int m_rdoEnd; + short m_hashIds; + unsigned int m_nClusters; + bool m_condensedMode; + + unsigned int m_nSP; + unsigned int m_spacePointsStart; + } DECODED_SCT_HEADER_INFO; + + typedef struct SctSpacePointStruct { + float m_position[3]; + unsigned short m_clusterIdx[2]; + } SCT_SPACEPOINT_TYPE; + + typedef struct DecodedSctData { + int m_nHeaders; + int m_nTrailers; + int m_nPhiModules; + unsigned int m_nSpacePoints; + + DECODED_SCT_HEADER_INFO m_headersInfo[MAX_SCT_BS_HEADERS]; + unsigned short m_moduleInfoWord[MAX_SCT_BS_HEADERS]; + unsigned short m_headerInfoIndices[MAX_SCT_HASH];//starts from 1 !!! + unsigned short m_phiModuleIndices[MAX_SCT_BS_HEADERS/2]; + unsigned short m_clusterIds[MAX_NUMBER_BS_WORDS*2]; + float m_clusterPosition[MAX_NUMBER_BS_WORDS*2]; + SCT_SPACEPOINT_TYPE m_spacePoints[SCT_MAX_SP_PER_MODULE*MAX_SCT_BS_HEADERS/2]; + myushort4 m_decodedData[MAX_NUMBER_BS_WORDS*2]; + myushort2 m_clusterInfo[MAX_NUMBER_BS_WORDS*2]; + + } DECODED_SCT_DATA; + +} + +#endif diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/Work.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/Work.h new file mode 100644 index 0000000000000000000000000000000000000000..f51a73ddd5d63456647061d0e43cdda1e1fbb00a --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/Work.h @@ -0,0 +1,27 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_WORK_H +#define TRIGACCELEVENT_WORK_H + +#include<memory> + +#include "OffloadBuffer.h" + +namespace TrigAccel { + + class Work { + + public: + + Work() {}; + virtual ~Work() {}; + virtual std::shared_ptr<OffloadBuffer> getOutput() = 0; + virtual bool run() = 0; + virtual unsigned int getId() const = 0; + + }; +} + +#endif diff --git a/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/WorkFactory.h b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/WorkFactory.h new file mode 100644 index 0000000000000000000000000000000000000000..077318d166b2585a68c4a35006c450e7c3d59ccb --- /dev/null +++ b/Trigger/TrigAccel/TrigAccelEvent/TrigAccelEvent/WorkFactory.h @@ -0,0 +1,28 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGACCELEVENT_WORKFACTORY_H +#define TRIGACCELEVENT_WORKFACTORY_H + +#include <iostream> +#include <vector> +#include <memory> + +#include "Work.h" + +namespace TrigAccel { + + class WorkFactory{ + public: + WorkFactory(){}; + virtual ~WorkFactory(){}; + virtual bool configure() = 0; + virtual Work* createWork(int, std::shared_ptr<OffloadBuffer> data) = 0; + virtual const std::vector<int> getProvidedAlgs() = 0; + virtual int getFactoryId() = 0; + }; + +} + +#endif diff --git a/Trigger/TrigAccel/TrigGpuTest/CMakeLists.txt b/Trigger/TrigAccel/TrigGpuTest/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a66bca42ff1b1832232380b56cfe3f8c7fc5929c --- /dev/null +++ b/Trigger/TrigAccel/TrigGpuTest/CMakeLists.txt @@ -0,0 +1,19 @@ +################################################################################ +# Package: TrigGpuTest +################################################################################ + +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +link_libraries(stdc++fs) + +# Set the name of the package. +atlas_subdir( TrigGpuTest ) + +# External dependencies: + +find_package(TBB) + +atlas_add_executable( trigGpuTest + src/*.cxx + INCLUDE_DIRS ${TBB_INCLUDE_DIRS} + LINK_LIBRARIES TrigAccelEvent ${TBB_LIBRARIES} rt dl) diff --git a/Trigger/TrigAccel/TrigGpuTest/src/trigGpuTest.cxx b/Trigger/TrigAccel/TrigGpuTest/src/trigGpuTest.cxx new file mode 100644 index 0000000000000000000000000000000000000000..83dca524a5a2ac6f215648c5becad1c3729b5c05 --- /dev/null +++ b/Trigger/TrigAccel/TrigGpuTest/src/trigGpuTest.cxx @@ -0,0 +1,155 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include <iostream> +#include <fstream> +#include <cstring> +#include <dlfcn.h> + +#include <experimental/filesystem> +#include "tbb/tick_count.h" + +#include "TrigAccelEvent/WorkFactory.h" +#include "TrigAccelEvent/DataExportBuffer.h" +#include "TrigAccelEvent/TrigInDetAccelEDM.h" +#include "TrigAccelEvent/TrigInDetAccelCodes.h" + +#include <vector> +#include <memory> + +int main(int argc, char* argv[]) { + if(argc < 4) { + std::cout<<"trigGpuTest usage: ./trigGpuTest <geo_file.bin> <data_dir> nevents"<<std::endl; + exit(0); + } + + + //open the factory library + + void* handle = dlopen("libTrigInDetCUDA.so", RTLD_LAZY); + + if(!handle) { + fprintf(stderr, "cannot load the factory library : %s\n", dlerror()); + exit(EXIT_FAILURE); + } + + dlerror(); + + //declare library interface methods + + TrigAccel::WorkFactory* (*getFactory)(); + int (*getFactoryId)(); + void (*deleteFactory)(TrigAccel::WorkFactory*); + + getFactory = (TrigAccel::WorkFactory* (*)()) dlsym(handle, "getFactory"); + getFactoryId = (int (*)()) dlsym(handle, "getFactoryId"); + deleteFactory = (void (*)(TrigAccel::WorkFactory*)) dlsym(handle, "deleteFactory"); + + std::cout<<"factory library id = "<<std::hex<<getFactoryId()<<std::dec<<std::endl; + + TrigAccel::WorkFactory* pW = getFactory(); + + bool cfgResult = pW->configure(); + + if(!cfgResult) { + std::cout<<"Factory config failed"<<std::endl; + exit(-2); + } + + + const size_t bufferOffset = 256; + + TrigAccel::DATA_EXPORT_BUFFER* pBG = new TrigAccel::DATA_EXPORT_BUFFER(); + + std::string geoName(argv[1]); + + std::cout<<"reading geometry from file "<<geoName<<std::endl; + + size_t bSize = pBG->load(geoName); + + std::cout<<"loaded "<<bSize<<" bytes"<<std::endl; + + std::shared_ptr<TrigAccel::OffloadBuffer> pDMBuff = std::make_shared<TrigAccel::OffloadBuffer>(pBG); + + delete pBG; + + std::cout<<"Creating Work item for task "<<TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT<<std::endl; + + pW->createWork(TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT, pDMBuff); + + + std::string data_path(argv[2]); + std::vector<std::string> event_files; + + for(const auto& entry : std::experimental::filesystem::directory_iterator(data_path)) { + event_files.push_back(entry.path()); + } + + int nEvents = atoi(argv[3]); + + std::cout<<"running the GPU test with "<<nEvents<<" events"<<std::endl; + + TrigAccel::DATA_EXPORT_BUFFER* pB = new TrigAccel::DATA_EXPORT_BUFFER(); + + int fileIdx = 0; + + std::ofstream timeFile("results.csv"); + + timeFile<<"nsp,nseeds,time"<<std::endl; + + for(int iEvent=0;iEvent<nEvents;iEvent++) { + + const std::string& fileName = event_files[fileIdx]; + + fileIdx++; + if(fileIdx >= (int)event_files.size()) { + fileIdx = 0; + } + + std::cout<<"reading event from file "<<fileName<<std::endl; + + bSize = pB->load(fileName); + + TrigAccel::SEED_MAKING_JOB* pJ = reinterpret_cast<TrigAccel::SEED_MAKING_JOB*>(pB->m_buffer + bufferOffset); + + TrigAccel::SPACEPOINT_STORAGE& sps = pJ->m_data; + + tbb::tick_count tzero = tbb::tick_count::now(); + + std::shared_ptr<TrigAccel::OffloadBuffer> pBuff = std::make_shared<TrigAccel::OffloadBuffer>(pB); + + TrigAccel::Work* pJob = pW->createWork(TrigAccel::InDetJobControlCode::MAKE_SEEDS, pBuff); + + if(!pJob) { + std::cout<<"ERROR: cannot create work item"<<std::endl; + exit(-3); + } + + pJob->run(); + + tbb::tick_count tnow=tbb::tick_count::now(); + tbb::tick_count::interval_t duration = tnow - tzero; + std::cout<<"triplet making took "<<duration.seconds()*1000.0<<" ms"<<std::endl; + + std::shared_ptr<TrigAccel::OffloadBuffer> pOB = pJob->getOutput(); + + TrigAccel::OUTPUT_SEED_STORAGE* pOutput = reinterpret_cast<TrigAccel::OUTPUT_SEED_STORAGE *>(pOB->m_rawBuffer); + + std::cout<<"Found "<<pOutput->m_nSeeds<<" triplets"<<std::endl; + timeFile<<sps.m_nSpacepoints<<","<<pOutput->m_nSeeds<<","<<duration.seconds()*1000.0<<std::endl; + + delete pJob; + } + + timeFile.close(); + + delete pB; + + deleteFactory(pW); + + dlclose(handle); + + + +} diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/CMakeLists.txt b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7b7fa10044d639d9e12f6055dc0bad32e58591b --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/CMakeLists.txt @@ -0,0 +1,26 @@ +################################################################################ +# Package: TrigInDetAccelerationService +################################################################################ + +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# Declare the package name: + +atlas_subdir( TrigInDetAccelerationService ) + +# External dependencies: + +find_package( TBB ) + +# Component(s) in the package: +atlas_add_library( TrigInDetAccelerationServiceLib + src/*.cxx + PUBLIC_HEADERS TrigInDetAccelerationService + INCLUDE_DIRS ${TBB_INCLUDE_DIRS} + LINK_LIBRARIES AthenaKernel GaudiKernel AthenaBaseComps StoreGateLib InDetIdentifier PixelReadoutGeometry SCT_ReadoutGeometry TrigAccelEvent ${TBB_LIBRARIES} rt dl) + +atlas_add_component( TrigInDetAccelerationService + src/components/*.cxx + LINK_LIBRARIES TrigInDetAccelerationServiceLib AthenaKernel GaudiKernel AthenaBaseComps StoreGateLib TrigAccelEvent ${TBB_LIBRARIES} rt dl) + +atlas_install_python_modules( python/*.py ) diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h new file mode 100644 index 0000000000000000000000000000000000000000..be940bba0cdf3c1bb5890cd26ee1d393dd6d98ed --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h @@ -0,0 +1,38 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETACCELERATIONSERVICE_ITRIGINDETACCELERATIONSVC_H +#define TRIGINDETACCELERATIONSERVICE_ITRIGINDETACCELERATIONSVC_H + +#include "GaudiKernel/IService.h" + +#include "TrigAccelEvent/Work.h" +#include "TrigAccelEvent/OffloadBuffer.h" + +#include<memory> +#include<vector> + + +/// Service Interface for TrigInDetAccelerationSvc class + +class ITrigInDetAccelerationSvc : virtual public IService { + + public: + + /// Interface ID + DeclareInterfaceID(ITrigInDetAccelerationSvc,1,0); + + // main methods + + virtual bool isReady() const = 0; + virtual TrigAccel::Work* createWork(unsigned int, std::shared_ptr<TrigAccel::OffloadBuffer>) const = 0; + + //helper + + virtual const std::vector<short>& getLayerInformation(int) const = 0; + +}; + +#endif + diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/python/TrigInDetAccelerationSvcDefault.py b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/python/TrigInDetAccelerationSvcDefault.py new file mode 100644 index 0000000000000000000000000000000000000000..1a5a9f3ed74eb83a29811dc93e9f3c70b885f5b6 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/python/TrigInDetAccelerationSvcDefault.py @@ -0,0 +1,18 @@ +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# default configuration of TrigInDetAccelerationSvc + +# import the base class + +from TrigInDetAccelerationService.TrigInDetAccelerationServiceConf import TrigInDetAccelerationSvc + + +class TrigInDetAccelerationSvcDefault( TrigInDetAccelerationSvc ) : + + def __init__(self, name="TrigInDetAccelerationSvcDefault"): + # call base class constructor + TrigInDetAccelerationSvc.__init__( self, name="TrigInDetAccelerationSvc") + self.NumberOfDCs = 8 + + + diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.cxx b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.cxx new file mode 100644 index 0000000000000000000000000000000000000000..8b6804fce4e06b02a1c98bef014e8ab9784182d2 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.cxx @@ -0,0 +1,364 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include "InDetIdentifier/SCT_ID.h" +#include "InDetIdentifier/PixelID.h" + +#include "SCT_ReadoutGeometry/SCT_DetectorManager.h" +#include "PixelReadoutGeometry/PixelDetectorManager.h" +#include "InDetReadoutGeometry/SiNumerology.h" +#include "InDetReadoutGeometry/SiDetectorElement.h" + +#include "TrigInDetAccelerationSvc.h" +#include "TrigAccelEvent/TrigInDetAccelCodes.h" +#include "TrigAccelEvent/TrigInDetAccelEDM.h" + +#include <dlfcn.h> + +////////// +/// Constructor +///// + +TrigInDetAccelerationSvc::TrigInDetAccelerationSvc( const std::string& name, ISvcLocator* pSvcLocator) : + base_class( name, pSvcLocator ), + m_nDCs(12), + m_moduleName("libTrigInDetCUDA.so"), + m_libHandle(0), + m_pWF(0), + m_detStore("DetectorStore", name), + m_evtStore("StoreGateSvc",name), + m_factoryConfigured(false) { + + declareProperty( "NumberOfDCs", m_nDCs = 8 ); + declareProperty( "ModuleName", m_moduleName = "libTrigInDetCUDA.so"); +} + + +////////// +/// Initialize +///// + +StatusCode TrigInDetAccelerationSvc::initialize() { + + ATH_MSG_INFO("TrigInDetAccelerationSvc: initialize"); + + ATH_CHECK (m_evtStore.retrieve() ); + ATH_CHECK (m_detStore.retrieve() ); + + //load the OffloadFactory library + + m_libHandle = dlopen(m_moduleName.c_str(), RTLD_LAZY); + + if(!m_libHandle) { + ATH_MSG_INFO("TrigInDetAccelerationSvc: cannot load the factory library, error:"<<dlerror()); + return StatusCode::SUCCESS; + } + + dlerror(); + + //declare library interface methods + + TrigAccel::WorkFactory* (*getFactory)(); + + getFactory = (TrigAccel::WorkFactory* (*)()) dlsym(m_libHandle, "getFactory"); + + m_pWF = getFactory(); + + bool cfgResult = m_pWF->configure(); + + if(!cfgResult) { + ATH_MSG_INFO("OffloadFactory config failed"); + m_factoryConfigured = false; + return StatusCode::SUCCESS; + } + + m_factoryConfigured = true; + + for(int i=0;i<3;i++) m_layerInfo[i].clear(); + + ATH_MSG_INFO("TrigInDetAccelerationSvc: created OffloadFactory, factory id = "<<std::hex<<m_pWF->getFactoryId()<<std::dec); + + /* + * Ask to be informed at the beginning of a new run so that we + * can collect geometry, conditions, etc. and copy them to on-GPU data structures + */ + + IIncidentSvc* incsvc; + StatusCode sc = service("IncidentSvc", incsvc); + int priority = 100; + if( sc.isSuccess() ) { + incsvc->addListener( this, "BeginRun", priority); + } + + + return StatusCode::SUCCESS; +} + +///////// +/// Finalize +///// +StatusCode TrigInDetAccelerationSvc::finalize() { + + void (*deleteFactory)(TrigAccel::WorkFactory*); + deleteFactory = (void (*)(TrigAccel::WorkFactory*)) dlsym(m_libHandle, "deleteFactory"); + + deleteFactory(m_pWF); + + dlclose(m_libHandle); + + return StatusCode::SUCCESS; +} + + +///////// +/// OnBeginRun data gathering and export +///// + +void TrigInDetAccelerationSvc::handle(const Incident&) { + + ATH_MSG_INFO("OnBeginRun "); + + std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> > hashMap; + + if(!extractGeometryInformation(hashMap)) { + ATH_MSG_INFO("Geometry extraction failed"); + return; + } + + if(!exportGeometryInformation(hashMap)) { + ATH_MSG_INFO("Geometry export failed"); + } + + return; +} + +TrigAccel::Work* TrigInDetAccelerationSvc::createWork(unsigned int jobCode, std::shared_ptr<TrigAccel::OffloadBuffer> pB) const { + + if(!m_factoryConfigured) return 0; + + return m_pWF->createWork(jobCode, pB); + +} + + +const std::vector<short>& TrigInDetAccelerationSvc::getLayerInformation(int i) const { + if(i<0 || i>2) i = 0; + return m_layerInfo[i]; +} + + +bool TrigInDetAccelerationSvc::exportGeometryInformation(const std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >& hashMap) const { + + const InDetDD::SCT_DetectorManager * sct_mgr; + const InDetDD::PixelDetectorManager * pix_mgr; + + if (m_detStore->retrieve(sct_mgr, "SCT").isFailure()) { + ATH_MSG_WARNING("failed to get SCT Manager"); + return false; + + } + if (m_detStore->retrieve(pix_mgr,"Pixel").isFailure()) { + ATH_MSG_WARNING("failed to get SCT Manager"); + return false; + } + + //export layer structure + + size_t dataTypeSize = sizeof(TrigAccel::DETECTOR_MODEL); + const size_t bufferOffset = 256; + size_t totalSize = bufferOffset + dataTypeSize; + + TrigAccel::DATA_EXPORT_BUFFER* pBG = new TrigAccel::DATA_EXPORT_BUFFER(5000); + + if(!pBG->fit(totalSize)) pBG->reallocate(totalSize); + + TrigAccel::DETECTOR_MODEL* pArray = reinterpret_cast<TrigAccel::DETECTOR_MODEL*>(pBG->m_buffer + bufferOffset); + + memset(pArray,0,dataTypeSize); + + int nLayers = (int)hashMap.size(); + pArray->m_nLayers = nLayers; + pArray->m_nModules=0; + + int layerIdx=0; + + for(std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >::const_iterator it = hashMap.begin();it!=hashMap.end();++it, layerIdx++) { + short subdetid = std::get<0>((*it).first); + short barrel_ec = std::get<1>((*it).first); + + pArray->m_layers[layerIdx].m_nElements = 0; + pArray->m_layers[layerIdx].m_subdet = subdetid; + pArray->m_layers[layerIdx].m_type = barrel_ec; + + std::vector<std::vector<PhiEtaHash>::const_iterator> vStops; + vStops.push_back((*it).second.begin()); + std::vector<PhiEtaHash>::const_iterator firstIt = (*it).second.begin(); + std::vector<PhiEtaHash>::const_iterator nextIt = (*it).second.begin(); + ++nextIt; + + int nPhiSlices=0; + + for(; nextIt!=(*it).second.end();++nextIt, ++firstIt) { + if((*nextIt).m_phiIndex!=(*firstIt).m_phiIndex) { + vStops.push_back(nextIt); + nPhiSlices++; + } + } + + nPhiSlices++; + + vStops.push_back((*it).second.end()); + + float rc=0.0; + float minBound = 100000.0; + float maxBound =-100000.0; + + pArray->m_layers[layerIdx].m_nPhiSlices = nPhiSlices; + + //3. iterating over phi sectors + + for(unsigned int iStops = 1; iStops<vStops.size();iStops++) { + + int nPhiModules = 0; + + bool first = true; + + for(std::vector<PhiEtaHash>::const_iterator hIt = vStops[iStops-1];hIt!=vStops[iStops];++hIt, nPhiModules++) { + + pArray->m_hashArray[pArray->m_nModules] = (*hIt).m_hash; + + const InDetDD::SiDetectorElement *p = (subdetid==1) ? pix_mgr->getDetectorElement((*hIt).m_hash) : sct_mgr->getDetectorElement((*hIt).m_hash); + + if(first) { + first = false; + } + + pArray->m_layers[layerIdx].m_nElements++; + + const Amg::Vector3D& C = p->center(); + if(barrel_ec == 0) { + rc += sqrt(C(0)*C(0)+C(1)*C(1)); + if(p->zMin() < minBound) minBound = p->zMin(); + if(p->zMax() > maxBound) maxBound = p->zMax(); + pArray->m_minRZ[pArray->m_nModules] = p->zMin(); + pArray->m_maxRZ[pArray->m_nModules] = p->zMax(); + } + else { + rc += C(2); + if(p->rMin() < minBound) minBound = p->rMin(); + if(p->rMax() > maxBound) maxBound = p->rMax(); + pArray->m_minRZ[pArray->m_nModules] = p->rMin(); + pArray->m_maxRZ[pArray->m_nModules] = p->rMax(); + } + + pArray->m_nModules++; + } + + } + pArray->m_layers[layerIdx].m_refCoord = rc/pArray->m_layers[layerIdx].m_nElements; + pArray->m_layers[layerIdx].m_minBound = minBound; + pArray->m_layers[layerIdx].m_maxBound = maxBound; + } + + std::shared_ptr<TrigAccel::OffloadBuffer> pDMBuff = std::make_shared<TrigAccel::OffloadBuffer>(pBG); + + delete pBG; + + ATH_MSG_INFO("Creating Work item for task "<<TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT); + + TrigAccel::Work* pW = createWork(TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT, pDMBuff); + + return pW == 0;//request is actioned immediately, no actual WorkItem is created + +} + + + +bool TrigInDetAccelerationSvc::extractGeometryInformation(std::map<std::tuple<short,short,short>, std::vector<PhiEtaHash> >& hashMap) { + + const PixelID* pixelId; + const SCT_ID* sctId; + + if (m_detStore->retrieve(pixelId, "PixelID").isFailure()) { + ATH_MSG_WARNING("Could not get Pixel ID helper"); + return false; + } + + if (m_detStore->retrieve(sctId, "SCT_ID").isFailure()) { + ATH_MSG_WARNING("Could not get Pixel ID helper"); + return false; + } + + short subdetid = 1; + + for(int hash = 0; hash<(int)pixelId->wafer_hash_max(); hash++) { + + Identifier offlineId = pixelId->wafer_id(hash); + + if(offlineId==0) continue; + + short barrel_ec = pixelId->barrel_ec(offlineId); + if(abs(barrel_ec)>2) continue;//no DBM needed + short layer_disk = pixelId->layer_disk(offlineId); + short phi_index = pixelId->phi_module(offlineId); + short eta_index = pixelId->eta_module(offlineId); + auto t = std::make_tuple(subdetid, barrel_ec, layer_disk); + std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >::iterator it = hashMap.find(t); + if(it==hashMap.end()) + hashMap.insert(std::pair<std::tuple<short,short,short>,std::vector<PhiEtaHash> >(t,std::vector<PhiEtaHash>(1, PhiEtaHash(phi_index, eta_index, hash) ))); + else (*it).second.push_back(PhiEtaHash(phi_index, eta_index, hash)); + } + subdetid = 2; + for(int hash = 0; hash<(int)sctId->wafer_hash_max(); hash++) { + + Identifier offlineId = sctId->wafer_id(hash); + + if(offlineId==0) continue; + + short barrel_ec = sctId->barrel_ec(offlineId); + short layer_disk = sctId->layer_disk(offlineId); + short phi_index = sctId->phi_module(offlineId); + short eta_index = sctId->eta_module(offlineId); + + auto t = std::make_tuple(subdetid, barrel_ec, layer_disk); + std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >::iterator it = hashMap.find(t); + if(it==hashMap.end()) + hashMap.insert(std::pair<std::tuple<short,short,short>,std::vector<PhiEtaHash> >(t,std::vector<PhiEtaHash>(1, PhiEtaHash(phi_index, eta_index, hash)))); + else (*it).second.push_back(PhiEtaHash(phi_index, eta_index, hash)); + } + + m_layerInfo[1].resize(pixelId->wafer_hash_max(), -1); + m_layerInfo[2].resize(sctId->wafer_hash_max(), -1); + + int layerId=0; + + for(std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >::iterator it = hashMap.begin();it!=hashMap.end();++it, layerId++) { + + short subdetId = std::get<0>((*it).first); + short barrel_ec = std::get<1>((*it).first); + + m_layerInfo[0].push_back(barrel_ec); + + if(subdetId == 1) {//pixel + for(std::vector<PhiEtaHash>::iterator hIt = (*it).second.begin();hIt != (*it).second.end();++hIt) { + m_layerInfo[subdetId][(*hIt).m_hash] = layerId; + } + } + if(subdetId == 2) {//SCT + for(std::vector<PhiEtaHash>::iterator hIt = (*it).second.begin();hIt != (*it).second.end();++hIt) { + m_layerInfo[subdetId][(*hIt).m_hash] = layerId; + } + } + } + + + for(std::map<std::tuple<short,short,short>,std::vector<PhiEtaHash> >::iterator it = hashMap.begin();it!=hashMap.end();++it) { + + //sorting along phi first, then along eta + + std::sort((*it).second.begin(), (*it).second.end(), PhiEtaHash::compare()); + + } + return true; +} diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.h b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.h new file mode 100644 index 0000000000000000000000000000000000000000..8975aa184a0ce46ef94ede050485648d881f93bb --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/TrigInDetAccelerationSvc.h @@ -0,0 +1,89 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETACCELERATIONSERVICE_TRIGINDETACCELERATIONSVC_H +#define TRIGINDETACCELERATIONSERVICE_TRIGINDETACCELERATIONSVC_H + + +#include "GaudiKernel/IIncidentListener.h" +#include "GaudiKernel/IIncidentSvc.h" + +#include "TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h" + +#include "AthenaBaseComps/AthService.h" + +#include "GaudiKernel/ServiceHandle.h" + +#include "StoreGate/StoreGateSvc.h" + +#include "TrigAccelEvent/WorkFactory.h" + +#include<vector> +#include<map> +#include<cstring> + +/// Service for creating GPU-accelerated Work items for HLT ID algorithms + +class TrigInDetAccelerationSvc : public extends<AthService, ITrigInDetAccelerationSvc, IIncidentListener> { + + public: + + TrigInDetAccelerationSvc( const std::string&, ISvcLocator*); + + virtual ~TrigInDetAccelerationSvc() override {}; + + virtual StatusCode initialize() override; + virtual StatusCode finalize() override; + + virtual void handle(const Incident&) override; + + virtual bool isReady() const override { + return m_factoryConfigured; + } + + virtual TrigAccel::Work* createWork(unsigned int, std::shared_ptr<TrigAccel::OffloadBuffer>) const override; + virtual const std::vector<short>& getLayerInformation(int) const override; + + private: + + struct PhiEtaHash { + + struct compare { + public: + bool operator()(const struct PhiEtaHash& p1, const struct PhiEtaHash& p2) { + if(p1.m_phiIndex == p2.m_phiIndex) { + return p1.m_etaIndex < p2.m_etaIndex; + } + else { + return p1.m_phiIndex < p2.m_phiIndex; + } + } + }; + + public: + PhiEtaHash(short phi, short eta, int hash) : m_phiIndex(phi), m_etaIndex(eta), m_hash(hash) {}; + PhiEtaHash(const PhiEtaHash& p) : m_phiIndex(p.m_phiIndex), m_etaIndex(p.m_etaIndex), m_hash(p.m_hash) {}; + short m_phiIndex, m_etaIndex; + int m_hash; + }; + + int m_nDCs; + std::string m_moduleName; + void* m_libHandle; //for OffloadFactory + TrigAccel::WorkFactory* m_pWF; + ServiceHandle<StoreGateSvc> m_detStore; + ServiceHandle<StoreGateSvc> m_evtStore; + + bool m_factoryConfigured; + + bool exportGeometryInformation(const std::map<std::tuple<short,short,short>, std::vector<PhiEtaHash> >&) const; + bool extractGeometryInformation(std::map<std::tuple<short,short,short>, std::vector<PhiEtaHash> >&); + + std::vector<short> m_layerInfo[3]; + +}; + + + +#endif diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/components/TrigInDetAccelerationService_entries.cxx b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/components/TrigInDetAccelerationService_entries.cxx new file mode 100644 index 0000000000000000000000000000000000000000..b9e792b384e2bfe549b65e4ad28a320bbce70f61 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService/src/components/TrigInDetAccelerationService_entries.cxx @@ -0,0 +1,9 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include "../TrigInDetAccelerationSvc.h" + +DECLARE_COMPONENT( TrigInDetAccelerationSvc ) + + diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/CMakeLists.txt b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4e0468d9dfb5ec2610ff189f9f81e31c7353d5c --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/CMakeLists.txt @@ -0,0 +1,18 @@ +################################################################################ +# Package: TrigInDetAccelerationTool +################################################################################ + +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# Declare the package name: +atlas_subdir( TrigInDetAccelerationTool ) + +# Component(s) in the package: +atlas_add_component( TrigInDetAccelerationTool + src/*.cxx + src/components/*.cxx + LINK_LIBRARIES AthenaBaseComps GaudiKernel InDetIdentifier TrigInDetEvent TrigSteeringEvent TrigInDetPattRecoTools AthenaBaseComps InDetIdentifier TrigInDetPattRecoEvent TrigAccelEvent TrigInDetAccelerationServiceLib) + +# Install files from the package: +atlas_install_headers( TrigInDetAccelerationTool ) + diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/TrigInDetAccelerationTool/ITrigInDetAccelerationTool.h b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/TrigInDetAccelerationTool/ITrigInDetAccelerationTool.h new file mode 100644 index 0000000000000000000000000000000000000000..6023fea67bbbc2eedfb2dd6d80f2d7e85ace980a --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/TrigInDetAccelerationTool/ITrigInDetAccelerationTool.h @@ -0,0 +1,29 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETACCELERATIONTOOL_ITRIGINDETACCELERATIONTOOL_H +#define TRIGINDETACCELERATIONTOOL_ITRIGINDETACCELERATIONTOOL_H + +#include <vector> +#include "GaudiKernel/IAlgTool.h" +#include "TrigSteeringEvent/TrigRoiDescriptor.h" +#include "TrigInDetEvent/TrigSiSpacePointBase.h" +#include "TrigInDetPattRecoTools/TrigCombinatorialSettings.h" +#include "TrigAccelEvent/DataExportBuffer.h" + +static const InterfaceID IID_ITrigInDetAccelerationTool("ITrigInDetAccelerationTool", 1 , 0); + +class ITrigInDetAccelerationTool: virtual public IAlgTool +{ + public: + + static const InterfaceID& interfaceID() { + return IID_ITrigInDetAccelerationTool; + } + + virtual size_t exportSeedMakingJob(const TrigCombinatorialSettings&, const IRoiDescriptor*, const std::vector<TrigSiSpacePointBase>&, TrigAccel::DATA_EXPORT_BUFFER&) const = 0; + +}; + +#endif diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.cxx b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.cxx new file mode 100644 index 0000000000000000000000000000000000000000..cdc4f5a3d31ab4d989cf99680b4abbc96a2a44ee --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.cxx @@ -0,0 +1,171 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include "TrigInDetAccelerationTool.h" +#include "AthenaBaseComps/AthMsgStreamMacros.h" +#include "AthenaBaseComps/AthCheckMacros.h" +#include "TrigAccelEvent/TrigInDetAccelEDM.h" + +TrigInDetAccelerationTool::TrigInDetAccelerationTool(const std::string& t, + const std::string& n, + const IInterface* p ): AthAlgTool(t,n,p), + m_accelSvc("TrigInDetAccelerationSvc",this->name()) { + + declareInterface< ITrigInDetAccelerationTool >( this ); +} + +StatusCode TrigInDetAccelerationTool::initialize() { + + ATH_CHECK(m_accelSvc.retrieve()); + return StatusCode::SUCCESS; +} + +StatusCode TrigInDetAccelerationTool::finalize() { + return StatusCode::SUCCESS; +} + +size_t TrigInDetAccelerationTool::exportSeedMakingJob(const TrigCombinatorialSettings& tcs, const IRoiDescriptor* roi, const std::vector<TrigSiSpacePointBase>& vsp, TrigAccel::DATA_EXPORT_BUFFER& output) const { + + typedef struct SpIndexPair { + public: + + struct compareZ { + public: + bool operator()(const std::pair<int, const TrigSiSpacePointBase*>& p1, const std::pair<int, const TrigSiSpacePointBase*>& p2) { + return p1.second->z() < p2.second->z(); + } + }; + + struct compareR { + public: + bool operator()(const std::pair<int, const TrigSiSpacePointBase*>& p1, const std::pair<int, const TrigSiSpacePointBase*>& p2) { + return p1.second->r() < p2.second->r(); + } + }; + + } SP_INDEX_PAIR; + + + //0. get InDet geometry information + + const std::vector<short>& pixelLayers = m_accelSvc->getLayerInformation(1); + const std::vector<short>& sctLayers = m_accelSvc->getLayerInformation(2); + const std::vector<short>& layerTypes = m_accelSvc->getLayerInformation(0); + + + //1. check buffer size + + size_t dataTypeSize = sizeof(TrigAccel::SEED_MAKING_JOB); + const size_t bufferOffset = 256; + size_t totalSize = bufferOffset+dataTypeSize;//make room for the header + if(!output.fit(totalSize)) output.reallocate(totalSize); + + TrigAccel::SEED_MAKING_JOB* pJ = reinterpret_cast<TrigAccel::SEED_MAKING_JOB*>(output.m_buffer+bufferOffset); + + memset(pJ,0,dataTypeSize); + + TrigAccel::SEED_FINDER_SETTINGS& sfs = pJ->m_settings; + + sfs.m_maxBarrelPix = tcs.m_maxBarrelPix; + sfs.m_minEndcapPix = tcs.m_minEndcapPix; + sfs.m_maxEndcapPix = tcs.m_maxEndcapPix; + sfs.m_maxSiliconLayer = tcs.m_maxSiliconLayer; + + sfs.m_magFieldZ = tcs.m_magFieldZ; + + sfs.m_tripletD0Max = tcs.m_tripletD0Max; + sfs.m_tripletD0_PPS_Max = tcs.m_tripletD0_PPS_Max; + sfs.m_tripletPtMin = tcs.m_tripletPtMin; + sfs.m_tripletDoPSS = tcs.m_tripletDoPSS ? 1 : 0; + sfs.m_doubletFilterRZ = tcs.m_doubletFilterRZ ? 1 : 0; + sfs.m_nMaxPhiSlice = tcs.m_nMaxPhiSlice; + sfs.m_maxTripletBufferLength = tcs.m_maxTripletBufferLength; + sfs.m_isFullScan = 1; + + sfs.m_zedMinus = roi->zedMinus(); + sfs.m_zedPlus = roi->zedPlus(); + + TrigAccel::SPACEPOINT_STORAGE& sps = pJ->m_data; + + unsigned int nSP = vsp.size(); + if(nSP >= TrigAccel::MAX_NUMBER_SPACEPOINTS) { + nSP = TrigAccel::MAX_NUMBER_SPACEPOINTS-1; + ATH_MSG_WARNING("MAX_NUMBER_SPACEPOINTS exceeded, exported data truncated ..."); + } + + //2. arrange spacepoints into phi/Layer bins + + double phiSliceWidth = 2*M_PI/tcs.m_nMaxPhiSlice; + int nLayers = (int) layerTypes.size(); + int nSlices = (int) tcs.m_nMaxPhiSlice; + + std::vector<std::vector<std::pair<int, const TrigSiSpacePointBase*> > > phiLArray; + phiLArray.resize(nLayers*nSlices); + + for(unsigned int i=0;i<nSP;i++) { + const TrigSiSpacePointBase& sp = vsp[i]; + const std::pair<IdentifierHash, IdentifierHash>& els = sp.offlineSpacePoint()->elementIdList(); + + IdentifierHash hashId = els.first; + short layerId = -1; + if(sp.isPixel()) { + layerId = pixelLayers[hashId]; + } else { + layerId = sctLayers[hashId]; + } + int phiIdx = (sp.phi()+M_PI)/phiSliceWidth; + if (phiIdx >= tcs.m_nMaxPhiSlice) { + phiIdx %= tcs.m_nMaxPhiSlice; + } + else if (phiIdx < 0) { + phiIdx += tcs.m_nMaxPhiSlice; + phiIdx %= tcs.m_nMaxPhiSlice; + } + + std::vector<std::pair<int, const TrigSiSpacePointBase*> >& v = phiLArray[layerId + phiIdx*nLayers]; + v.push_back(std::pair<int, const TrigSiSpacePointBase*>(i,&sp));//storing the original index of spacepoint + } + + //sorting spacepoints in accordance with non-ref coordinate + int layerIdx=0; + for(std::vector<short>::const_iterator it = layerTypes.begin();it!=layerTypes.end();++it, layerIdx++) { + short barrel_ec = (*it);//0-barrel, !=0 - endcap + for(int slice = 0;slice<nSlices;slice++) { + std::vector<std::pair<int, const TrigSiSpacePointBase*> >& v = phiLArray[layerIdx + slice*nLayers]; + if(barrel_ec==0) std::sort(v.begin(), v.end(), SP_INDEX_PAIR::compareZ()); + else std::sort(v.begin(), v.end(), SP_INDEX_PAIR::compareR()); + } + } + + sps.m_nSpacepoints = nSP; + sps.m_nPhiSlices = nSlices; + sps.m_nLayers = nLayers; + + int spIdx=0; + for(int slice = 0;slice<nSlices;slice++) { + for(int layer = 0;layer<nLayers;layer++) { + int layerStart = spIdx; + std::vector<std::pair<int, const TrigSiSpacePointBase*> >& v = phiLArray[layer + slice*nLayers]; + for(std::vector<std::pair<int, const TrigSiSpacePointBase*> >::iterator it = v.begin();it!=v.end();++it) { + const TrigSiSpacePointBase* sp = (*it).second; + sps.m_index[spIdx] = (*it).first; + sps.m_type[spIdx] = sp->isPixel() ? 1 : 2; + sps.m_x[spIdx] = sp->x(); + sps.m_y[spIdx] = sp->y(); + sps.m_z[spIdx] = sp->z(); + sps.m_r[spIdx] = sp->r(); + sps.m_phi[spIdx] = sp->phi(); + sps.m_covR[spIdx] = sp->dr()*sp->dr(); + sps.m_covZ[spIdx] = sp->dz()*sp->dz(); + spIdx++; + } + int layerEnd = spIdx; + sps.m_phiSlices[slice].m_layerBegin[layer] = layerStart; + sps.m_phiSlices[slice].m_layerEnd[layer] = layerEnd; + } + } + + return totalSize; + +} diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.h b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.h new file mode 100644 index 0000000000000000000000000000000000000000..d50de231983d3bcb1dde6363241db91eb7eb0ef1 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/TrigInDetAccelerationTool.h @@ -0,0 +1,30 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETACCELERATIONTOOL_TRIGINDETACCELERATIONTOOL_H +#define TRIGINDETACCELERATIONTOOL_TRIGINDETACCELERATIONTOOL_H + +#include "AthenaBaseComps/AthAlgTool.h" +#include "GaudiKernel/ToolHandle.h" +#include "GaudiKernel/ServiceHandle.h" + +#include "TrigInDetAccelerationTool/ITrigInDetAccelerationTool.h" + +#include "TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h" + +class TrigInDetAccelerationTool: public AthAlgTool, virtual public ITrigInDetAccelerationTool { + + public: + TrigInDetAccelerationTool( const std::string&, const std::string&, const IInterface* ); + virtual StatusCode initialize() override; + virtual StatusCode finalize() override; + + size_t virtual exportSeedMakingJob(const TrigCombinatorialSettings&, const IRoiDescriptor*, const std::vector<TrigSiSpacePointBase>&, TrigAccel::DATA_EXPORT_BUFFER&) const override; + + private: + + ServiceHandle<ITrigInDetAccelerationSvc> m_accelSvc; +}; + +#endif diff --git a/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/components/TrigInDetAccelerationTool_entries.cxx b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/components/TrigInDetAccelerationTool_entries.cxx new file mode 100644 index 0000000000000000000000000000000000000000..89fd2d814a270a1de6ae82bbf539ac6914215a39 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool/src/components/TrigInDetAccelerationTool_entries.cxx @@ -0,0 +1,7 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include "../TrigInDetAccelerationTool.h" + +DECLARE_COMPONENT( TrigInDetAccelerationTool ) diff --git a/Trigger/TrigAccel/TrigInDetCUDA/CMakeLists.txt b/Trigger/TrigAccel/TrigInDetCUDA/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..946095f976c5377553fa88ba99758999b3334f38 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/CMakeLists.txt @@ -0,0 +1,26 @@ +################################################################################ +# Package: TrigInDetCUDA +################################################################################ + +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + +# Set the name of the package. +atlas_subdir( TrigInDetCUDA ) + +# As long as this CUDA code can't be interpreted by a C++ compiler as well, +# this package is not to be built. +if( NOT CMAKE_CUDA_COMPILER ) + message( STATUS "CUDA not found, TrigInDetCUDA is not built" ) + return() +endif() + +# External dependencies: +find_package( TBB ) + +# Add a component library that has some CUDA code in it. +atlas_add_component( TrigInDetCUDA + src/*.h src/*.cuh src/*.cu + INCLUDE_DIRS ${TBB_INCLUDE_DIRS} + LINK_LIBRARIES TrigAccelEvent ${TBB_LIBRARIES}) + + diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletCountingKernelCuda.cuh b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletCountingKernelCuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..3a3a1f94824091a6de00e08be4b745722c3abf5a --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletCountingKernelCuda.cuh @@ -0,0 +1,160 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_DOUBLETCOUNTINGKERNELCUDA_CUH +#define TRIGINDETCUDA_DOUBLETCOUNTINGKERNELCUDA_CUH + +#include <cuda_runtime.h> + +#include "SeedMakingDataStructures.h" + +__global__ static void doubletCountingKernel(TrigAccel::SEED_FINDER_SETTINGS* dSettings, + TrigAccel::SPACEPOINT_STORAGE* dSpacepoints, + TrigAccel::DETECTOR_MODEL* dDetModel, + DOUBLET_INFO* d_Info, + int nLayers, int nSlices) { + + __shared__ int spBegin; + __shared__ int spEnd; + __shared__ int nMiddleSPs; + + __shared__ int nInner[NUM_MIDDLE_THREADS]; + __shared__ int nOuter[NUM_MIDDLE_THREADS]; + + const float zTolerance = 3.0; + const float maxEta = 2.7; + const float maxDoubletLength = 200.0; + const float minDoubletLength = 10.0; + const float maxOuterRadius = 550.0; + + const int sliceIdx = blockIdx.x; + const int layerIdx = blockIdx.y; + + if(threadIdx.x == 0 && threadIdx.y == 0) { + const TrigAccel::SPACEPOINT_LAYER_RANGE& slr = dSpacepoints->m_phiSlices[sliceIdx]; + spBegin = slr.m_layerBegin[layerIdx]; + spEnd = slr.m_layerEnd[layerIdx]; + nMiddleSPs = spEnd-spBegin; + } + __syncthreads(); + + if(nMiddleSPs == 0) return; + + const float zMinus = dSettings->m_zedMinus - zTolerance; + const float zPlus = dSettings->m_zedPlus + zTolerance; + const float maxTheta = 2*atan(exp(-maxEta)); + const float maxCtg = cos(maxTheta)/sin(maxTheta); + const bool DoPSS = dSettings->m_tripletDoPSS; + const float minOuterZ = dSettings->m_zedMinus - maxOuterRadius*maxCtg - zTolerance; + const float maxOuterZ = dSettings->m_zedPlus + maxOuterRadius*maxCtg + zTolerance; + + //1. get a tile of middle spacepoints + + for(int spmIdx=threadIdx.x+spBegin;spmIdx<spEnd;spmIdx+=blockDim.x) { + + int spmType = dSpacepoints->m_type[spmIdx]; + float zm = dSpacepoints->m_z[spmIdx]; + float rm = dSpacepoints->m_r[spmIdx]; + + bool isPixel = (spmType == 1); + + if(threadIdx.y ==0) { + nInner[threadIdx.x] = 0; + nOuter[threadIdx.x] = 0; + d_Info->m_nInner[spmIdx] = 0; + d_Info->m_nOuter[spmIdx] = 0; + } + + __syncthreads(); + + //2. loop over other phi-bins / layers + + for(int deltaPhiIdx=-1;deltaPhiIdx<=1;deltaPhiIdx++) { + int nextPhiIdx = sliceIdx + deltaPhiIdx; + if(nextPhiIdx>=nSlices) nextPhiIdx = 0; + if(nextPhiIdx<0) nextPhiIdx=nSlices-1; + const TrigAccel::SPACEPOINT_LAYER_RANGE& next_slr = dSpacepoints->m_phiSlices[nextPhiIdx]; + + for(int nextLayerIdx=0;nextLayerIdx<nLayers;nextLayerIdx++) { + if(nextLayerIdx == layerIdx) continue; + + int next_spBegin = next_slr.m_layerBegin[nextLayerIdx]; + int next_spEnd = next_slr.m_layerEnd[nextLayerIdx]; + + if(next_spEnd == next_spBegin) continue;//no spacepoints in this layer + + const TrigAccel::SILICON_LAYER& layerGeo = dDetModel->m_layers[nextLayerIdx]; + bool isBarrel = (layerGeo.m_type == 0); + float refCoord = layerGeo.m_refCoord; + + if(isBarrel && fabs(refCoord-rm)>maxDoubletLength) continue; + + //boundaries for nextLayer + + float minCoord = 10000.0; + float maxCoord =-10000.0; + + if(isBarrel) { + minCoord = zMinus + refCoord*(zm-zMinus)/rm; + maxCoord = zPlus + refCoord*(zm-zPlus)/rm; + } + else { + minCoord = rm*(refCoord-zMinus)/(zm-zMinus); + maxCoord = rm*(refCoord-zPlus)/(zm-zPlus); + } + + if(minCoord>maxCoord) { + float tmp = maxCoord;maxCoord = minCoord;minCoord = tmp; + } + + if(layerGeo.m_maxBound<minCoord || layerGeo.m_minBound>maxCoord) continue; + + //3. get a tile of inner/outer spacepoints + + for(int spIdx=threadIdx.y+next_spBegin;spIdx<next_spEnd;spIdx+=blockDim.y) { + + float zsp = dSpacepoints->m_z[spIdx]; + float rsp = dSpacepoints->m_r[spIdx]; + + float spCoord = (isBarrel) ? zsp : rsp; + + if(spCoord<minCoord || spCoord>maxCoord) continue; + + bool isPixel2 = (dSpacepoints->m_type[spIdx] == 1); + float dr = rsp - rm; + + if(fabs(dr)>maxDoubletLength || fabs(dr)<minDoubletLength) continue; + + if(!DoPSS && dr<0 && !isPixel && isPixel2) continue; + + float dz = zsp - zm; + float t = dz/dr; + + if(fabs(t)>maxCtg) continue; + //if(dr > 0) {//outer doublet + float outZ = zsp + (maxOuterRadius-rsp)*t; + if(outZ < minOuterZ || outZ > maxOuterZ) continue; + //} + if(dr > 0) + atomicAdd(&nOuter[threadIdx.x],1); + else + atomicAdd(&nInner[threadIdx.x],1); + } + } + } + + __syncthreads(); + + if(threadIdx.y == 0) { + d_Info->m_nInner[spmIdx] = nInner[threadIdx.x]; + d_Info->m_nOuter[spmIdx] = nOuter[threadIdx.x]; + d_Info->m_good[spmIdx] = (nInner[threadIdx.x] > 0 && nOuter[threadIdx.x] > 0) ? 1 : 0; + } + __syncthreads(); + } + __syncthreads(); + +} + +#endif diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMakingKernelCuda.cuh b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMakingKernelCuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..2a5d13614c8caf11771edc3a7305fc9ffcc8d996 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMakingKernelCuda.cuh @@ -0,0 +1,177 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_DOUBLETMAKINGKERNELCUDA_CUH +#define TRIGINDETCUDA_DOUBLETMAKINGKERNELCUDA_CUH + +#include <cuda_runtime.h> +#include "SeedMakingDataStructures.h" + +__global__ static void doubletMakingKernel(TrigAccel::SEED_FINDER_SETTINGS* dSettings, + TrigAccel::SPACEPOINT_STORAGE* dSpacepoints, + TrigAccel::DETECTOR_MODEL* dDetModel, + TrigAccel::OUTPUT_SEED_STORAGE* d_Out, + DOUBLET_INFO* d_Info, + DOUBLET_STORAGE* d_Storage, + int nLayers, int nSlices) { + + __shared__ int spBegin; + __shared__ int spEnd; + __shared__ int nMiddleSPs; + + __shared__ bool hasDoublets; + + __shared__ int outerPos[NUM_MIDDLE_THREADS]; + __shared__ int innerPos[NUM_MIDDLE_THREADS]; + + const float zTolerance = 3.0; + const float maxEta = 2.7; + const float maxDoubletLength = 200.0; + const float minDoubletLength = 10.0; + const float maxOuterRadius = 550.0; + + const int sliceIdx = blockIdx.x; + const int layerIdx = blockIdx.y; + + if(threadIdx.x == 0 && threadIdx.y == 0) { + const TrigAccel::SPACEPOINT_LAYER_RANGE& slr = dSpacepoints->m_phiSlices[sliceIdx]; + spBegin = slr.m_layerBegin[layerIdx]; + spEnd = slr.m_layerEnd[layerIdx]; + nMiddleSPs = spEnd-spBegin; + } + __syncthreads(); + + if(nMiddleSPs == 0) return; + + const float zMinus = dSettings->m_zedMinus - zTolerance; + const float zPlus = dSettings->m_zedPlus + zTolerance; + const float maxTheta = 2*atan(exp(-maxEta)); + const float maxCtg = cos(maxTheta)/sin(maxTheta); + const bool DoPSS = dSettings->m_tripletDoPSS; + const float minOuterZ = dSettings->m_zedMinus - maxOuterRadius*maxCtg - zTolerance; + const float maxOuterZ = dSettings->m_zedPlus + maxOuterRadius*maxCtg + zTolerance; + + //1. get a tile of middle spacepoints + + __syncthreads(); + + for(int spmIdx=threadIdx.x+spBegin;spmIdx<spEnd;spmIdx+=blockDim.x) { + + if(threadIdx.y ==0) { + hasDoublets = d_Info->m_good[spmIdx] == 1; + } + __syncthreads(); + + if(!hasDoublets) continue; + + if(threadIdx.y ==0) {//loading pre-calculated numbers of doublets ... + + int nInner = d_Info->m_nInner[spmIdx]; + int nOuter = d_Info->m_nOuter[spmIdx]; + + outerPos[threadIdx.x] = atomicAdd(&d_Storage->m_nO, nOuter); + innerPos[threadIdx.x] = atomicAdd(&d_Storage->m_nI, nInner); + int k = atomicAdd(&d_Storage->m_nItems, 1); + d_Storage->m_spmIdx[k] = spmIdx; + d_Storage->m_innerStart[k] = innerPos[threadIdx.x]; + d_Storage->m_outerStart[k] = outerPos[threadIdx.x]; + } + + __syncthreads(); + + int spmType = dSpacepoints->m_type[spmIdx]; + float zm = dSpacepoints->m_z[spmIdx]; + float rm = dSpacepoints->m_r[spmIdx]; + bool isPixel = (spmType == 1); + + //2. loop over other phi-bins / layers + + for(int deltaPhiIdx=-1;deltaPhiIdx<=1;deltaPhiIdx++) { + int nextPhiIdx = sliceIdx + deltaPhiIdx; + if(nextPhiIdx>=nSlices) nextPhiIdx = 0; + if(nextPhiIdx<0) nextPhiIdx=nSlices-1; + const TrigAccel::SPACEPOINT_LAYER_RANGE& next_slr = dSpacepoints->m_phiSlices[nextPhiIdx]; + + for(int nextLayerIdx=0;nextLayerIdx<nLayers;nextLayerIdx++) { + + if(nextLayerIdx == layerIdx) continue; + + int next_spBegin = next_slr.m_layerBegin[nextLayerIdx]; + int next_spEnd = next_slr.m_layerEnd[nextLayerIdx]; + + if(next_spEnd == next_spBegin) continue;//no spacepoints in this layer + + const TrigAccel::SILICON_LAYER& layerGeo = dDetModel->m_layers[nextLayerIdx]; + bool isBarrel = (layerGeo.m_type == 0); + float refCoord = layerGeo.m_refCoord; + + if(isBarrel && fabs(refCoord-rm)>maxDoubletLength) continue; + + //boundaries for nextLayer + + float minCoord = 10000.0; + float maxCoord =-10000.0; + + if(isBarrel) { + minCoord = zMinus + refCoord*(zm-zMinus)/rm; + maxCoord = zPlus + refCoord*(zm-zPlus)/rm; + } + else { + minCoord = rm*(refCoord-zMinus)/(zm-zMinus); + maxCoord = rm*(refCoord-zPlus)/(zm-zPlus); + } + + if(minCoord>maxCoord) { + float tmp = maxCoord;maxCoord = minCoord;minCoord = tmp; + } + + if(layerGeo.m_maxBound<minCoord || layerGeo.m_minBound>maxCoord) continue; + + //3. get a tile of inner/outer spacepoints + + for(int spIdx=threadIdx.y+next_spBegin;spIdx<next_spEnd;spIdx+=blockDim.y) { + + float zsp = dSpacepoints->m_z[spIdx]; + float rsp = dSpacepoints->m_r[spIdx]; + + float spCoord = (isBarrel) ? zsp : rsp; + + if(spCoord<minCoord || spCoord>maxCoord) continue; + + bool isPixel2 = (dSpacepoints->m_type[spIdx] == 1); + float dr = rsp - rm; + + if(fabs(dr)>maxDoubletLength || fabs(dr)<minDoubletLength) continue; + + if(!DoPSS && dr<0 && !isPixel && isPixel2) continue; + + float dz = zsp - zm; + float t = dz/dr; + + if(fabs(t)>maxCtg) continue; + //if(dr > 0) {//outer doublet + float outZ = zsp + (maxOuterRadius-rsp)*t; + if(outZ < minOuterZ || outZ > maxOuterZ) continue; + //} + if(dr > 0) { + int k = atomicAdd(&outerPos[threadIdx.x],1); + d_Storage->m_outer[k] = spIdx; + } + else { + int k = atomicAdd(&innerPos[threadIdx.x],1); + d_Storage->m_inner[k] = spIdx; + } + } + } + } + } + __syncthreads(); + + if(threadIdx.x == 0 && threadIdx.y == 0) { + atomicAdd(&d_Out->m_nMiddleSps, nMiddleSPs); + } + +} + +#endif \ No newline at end of file diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMatchingKernelCuda.cuh b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMatchingKernelCuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..4a041c4a5674007a482bec9ec579f46b29f994ae --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/DoubletMatchingKernelCuda.cuh @@ -0,0 +1,313 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_DOUBLETMATCHINGKERNELCUDA_CUH +#define TRIGINDETCUDA_DOUBLETMATCHINGKERNELCUDA_CUH + + +#include <cuda_runtime.h> +#include "SeedMakingDataStructures.h" + + +__global__ static void doubletMatchingKernel(TrigAccel::SEED_FINDER_SETTINGS* dSettings, + TrigAccel::SPACEPOINT_STORAGE* dSpacepoints, + TrigAccel::DETECTOR_MODEL* dDetModel, + DOUBLET_INFO* d_Info, + DOUBLET_STORAGE* d_Storage, + TrigAccel::OUTPUT_SEED_STORAGE* d_Out, int maxItem) { + + __shared__ int nInner; + __shared__ int nOuter; + + __shared__ int innerStart; + __shared__ int outerStart; + + __shared__ int spmIdx; + __shared__ float rm; + + __shared__ float covZ; + __shared__ float covR; + + __shared__ float x0; + __shared__ float y0; + __shared__ float z0; + + __shared__ float cosA; + __shared__ float sinA; + + __shared__ bool isPixel; + + // __shared__ float R2inv_array[MAX_NUMBER_DOUBLETS]; + __shared__ float Rinv_array[MAX_NUMBER_DOUBLETS]; + __shared__ float t_array[MAX_NUMBER_DOUBLETS]; + __shared__ int spIdx_array[MAX_NUMBER_DOUBLETS]; + __shared__ float u_array[MAX_NUMBER_DOUBLETS]; + __shared__ float v_array[MAX_NUMBER_DOUBLETS]; + __shared__ bool isSCT_array[MAX_NUMBER_DOUBLETS]; + + // __shared__ float covZ_array[MAX_NUMBER_DOUBLETS]; + // __shared__ float covR_array[MAX_NUMBER_DOUBLETS]; + + __shared__ float tCov_array[MAX_NUMBER_DOUBLETS]; + + + __shared__ int PairIdx_array[MAX_TRIPLETS]; + __shared__ float Q_array[MAX_TRIPLETS]; + __shared__ int sortedIdx[MAX_TRIPLETS]; + + + __shared__ int iDoublet; + __shared__ int startOfOuter; + __shared__ int nPairs; + __shared__ int nTriplets; + + const double dtCut = 0.25; + const float radLen = 0.036; + const float dp = 13.6/dSettings->m_tripletPtMin; + const float CovMS = dp*dp*radLen; + + + const float ptCoeff = 0.29997*dSettings->m_magFieldZ/2;// ~0.3 + const float minPt2 = dSettings->m_tripletPtMin*dSettings->m_tripletPtMin; + const float ptCoeff2 = ptCoeff*ptCoeff; + const float maxD0 = dSettings->m_tripletD0Max; + const float maxD0_PPS = dSettings->m_tripletD0_PPS_Max; + + + for(int itemIdx = blockIdx.x;itemIdx<maxItem;itemIdx += gridDim.x) { + + if(threadIdx.x==0) { + + nTriplets = 0; + iDoublet=0; + + spmIdx = d_Storage->m_spmIdx[itemIdx]; + nInner = d_Info->m_nInner[spmIdx]; + nOuter = d_Info->m_nOuter[spmIdx]; + + nPairs = nInner*nOuter; + + atomicAdd(&d_Out->m_nO,nOuter); + atomicAdd(&d_Out->m_nI,nInner); + + innerStart = d_Storage->m_innerStart[itemIdx]; + outerStart = d_Storage->m_outerStart[itemIdx]; + + int spmType = dSpacepoints->m_type[spmIdx]; + + rm = dSpacepoints->m_r[spmIdx]; + covZ = dSpacepoints->m_covZ[spmIdx]; + covR = dSpacepoints->m_covR[spmIdx]; + + x0 = dSpacepoints->m_x[spmIdx]; + y0 = dSpacepoints->m_y[spmIdx]; + z0 = dSpacepoints->m_z[spmIdx]; + cosA = x0/rm; + sinA = y0/rm; + isPixel = (spmType == 1); + } + __syncthreads(); + + + for(int innerIdx = threadIdx.x; innerIdx<nInner;innerIdx+=blockDim.x) { + + int k = atomicAdd(&iDoublet,1); + + if(k<MAX_NUMBER_DOUBLETS) { + + int spiIdx = d_Storage->m_inner[innerStart + innerIdx]; + + spIdx_array[k] = spiIdx; + + float dx_inn = dSpacepoints->m_x[spiIdx] - x0; + float dy_inn = dSpacepoints->m_y[spiIdx] - y0; + float dz_inn = dSpacepoints->m_z[spiIdx] - z0; + + float R2inv = 1.0/(dx_inn*dx_inn+dy_inn*dy_inn); + Rinv_array[k] = sqrt(R2inv); + t_array[k] = Rinv_array[k]*dz_inn; + + tCov_array[k] = R2inv*(covZ + dSpacepoints->m_covZ[spiIdx] + t_array[k]*t_array[k]*(covR + dSpacepoints->m_covR[spiIdx])); + + float xn_inn = dx_inn*cosA + dy_inn*sinA; + float yn_inn =-dx_inn*sinA + dy_inn*cosA; + + u_array[k] = xn_inn*R2inv; + v_array[k] = yn_inn*R2inv; + + isSCT_array[k] = (dSpacepoints->m_type[spiIdx] == 2); + } + } + + __syncthreads(); + + if(threadIdx.x==0) { + if(iDoublet>MAX_NUMBER_DOUBLETS) iDoublet = MAX_NUMBER_DOUBLETS; + startOfOuter=iDoublet; + } + + __syncthreads(); + + for(int outerIdx = threadIdx.x; outerIdx<nOuter;outerIdx+=blockDim.x) { + + int k = atomicAdd(&iDoublet,1); + + if(k<MAX_NUMBER_DOUBLETS) { + + int spoIdx = d_Storage->m_outer[outerStart + outerIdx]; + + spIdx_array[k] = spoIdx; + + float dx_out = dSpacepoints->m_x[spoIdx] - x0; + float dy_out = dSpacepoints->m_y[spoIdx] - y0; + float dz_out = -dSpacepoints->m_z[spoIdx] + z0; + + float R2inv = 1.0/(dx_out*dx_out+dy_out*dy_out); + Rinv_array[k] = sqrt(R2inv); + t_array[k] = Rinv_array[k]*dz_out; + + tCov_array[k] = R2inv*(covZ + dSpacepoints->m_covZ[spoIdx] + t_array[k]*t_array[k]*(covR + dSpacepoints->m_covR[spoIdx])); + + float xn_out = dx_out*cosA + dy_out*sinA; + float yn_out =-dx_out*sinA + dy_out*cosA; + + u_array[k] = xn_out*R2inv; + v_array[k] = yn_out*R2inv; + + isSCT_array[k] = (dSpacepoints->m_type[spoIdx] == 2); + } + } + + __syncthreads(); + + for(int pairIdx = threadIdx.x;pairIdx<nPairs;pairIdx += blockDim.x) { + + int doublet_i = pairIdx / nOuter; // inner doublet + int doublet_j = startOfOuter + pairIdx % nOuter; //outer doublet + + if(doublet_i >= MAX_NUMBER_DOUBLETS || doublet_j >=MAX_NUMBER_DOUBLETS ) continue; + + //int spiIdx = spIdx_array[doublet_i]; + //int spoIdx = spIdx_array[doublet_j]; + + //retrieve shared data for doublets doublet_i and doublet_j and apply cut(s) + + //0. dt matching + float t_inn = t_array[doublet_i]; + float t_out = t_array[doublet_j]; + float dt = t_inn - t_out; + if(fabs(dt)>dtCut) continue; + + //1. rz matching + + float t_inn2 = t_inn*t_inn; + float tCov_inn = tCov_array[doublet_i]; + float tCov_out = tCov_array[doublet_j]; + + double dCov = CovMS*(1+t_inn2); + + float covdt = tCov_inn + tCov_out; + covdt += 2*Rinv_array[doublet_i]*Rinv_array[doublet_j]*(t_inn*t_out*covR + covZ); + float dt2 = dt*dt*(1/9.0); + if(dt2 > covdt+dCov) continue;//i.e. 3-sigma cut + + //2. pT estimate + + float u_inn = u_array[doublet_i]; + float v_inn = v_array[doublet_i]; + float u_out = u_array[doublet_j]; + float v_out = v_array[doublet_j]; + + float du = u_out - u_inn; + if(du==0.0) continue; + float A = (v_out - v_inn)/du; + float B = v_inn - A*u_inn; + float pT2 = ptCoeff2*(1+A*A)/(B*B); + if(pT2 < minPt2) continue; + + //3. the 3-sigma cut with estimated pT + + float frac = minPt2/pT2; + if(dt2 > covdt+frac*dCov) continue; + + //4. d0 cut + + float d0 = rm*(B*rm-A); + float fd0 = fabs(d0); + + if(fd0 > maxD0) continue; + + bool isSCT_1 = isSCT_array[doublet_i]; + bool isSCT_3 = isSCT_array[doublet_j]; + + if (isSCT_3 && isPixel && fd0 > maxD0_PPS) continue; + + //Calculate Quality + + float Q = d0*d0; + Q += isSCT_3*(1000.0*isSCT_1 + (1-isSCT_1)*10000.0); + + int l = atomicAdd(&nTriplets, 1); + if(l<MAX_TRIPLETS) { + PairIdx_array[l] = pairIdx; + Q_array[l] = Q; + sortedIdx[l] = 0; + } + + } + + __syncthreads(); + + + if(nTriplets>TRIPLET_BUFFER_DEPTH) {//sorting + + if(threadIdx.x == 0){ + if(nTriplets>MAX_TRIPLETS) { + nTriplets = MAX_TRIPLETS; + } + } + + __syncthreads(); + + for(int tIdx=threadIdx.x;tIdx<nTriplets*nTriplets;tIdx+=blockDim.x) { + int i = tIdx/nTriplets; + int j = tIdx%nTriplets; + int d = (Q_array[i] > Q_array[j]) + (Q_array[i] == Q_array[j])*(j<i); + atomicAdd(&sortedIdx[i],d); + } + } + + __syncthreads(); + + if(threadIdx.x == 0) { + if(nTriplets>0) { + int nT = nTriplets; + if(nT>TRIPLET_BUFFER_DEPTH) {nT = TRIPLET_BUFFER_DEPTH;} + int k = atomicAdd(&d_Out->m_nSeeds, nT); + int nStored=0; + for(int tIdx=0;tIdx<nTriplets;tIdx++) { + if(sortedIdx[tIdx]<TRIPLET_BUFFER_DEPTH) {//store this triplet + + int pairIdx = PairIdx_array[tIdx]; + int doublet_i = pairIdx / nOuter; // inner doublet + int doublet_j = startOfOuter + pairIdx % nOuter; //outer doublet + int spiIdx = spIdx_array[doublet_i]; + int spoIdx = spIdx_array[doublet_j]; + d_Out->m_innerIndex[k+nStored] = dSpacepoints->m_index[spiIdx]; + d_Out->m_middleIndex[k+nStored] = dSpacepoints->m_index[spmIdx]; + d_Out->m_outerIndex[k+nStored] = dSpacepoints->m_index[spoIdx]; + d_Out->m_Q[k+nStored] = Q_array[tIdx]; + nStored++; + } + } + if(nStored!=nT) { + atomicAdd(&d_Out->m_nErrors,1); + } + } + } + __syncthreads(); + } +} + +#endif diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingDataStructures.h b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingDataStructures.h new file mode 100644 index 0000000000000000000000000000000000000000..5d116496cbd01cb4b7726e73e90aeadfdd23cfbb --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingDataStructures.h @@ -0,0 +1,45 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_SEEDMAKINGDATASTRUCTURES_H +#define TRIGINDETCUDA_SEEDMAKINGDATASTRUCTURES_H + +constexpr unsigned int MAX_MIDDLE_SP = 100000; +constexpr unsigned int MAX_DOUBLET = 10000000; +constexpr unsigned int NUM_MIDDLE_THREADS = 32; +constexpr unsigned int OUTER_THREADS_MULTIPLIER = 4; // i.e thread block is 32 x 4*192/32 +constexpr unsigned int MAX_NUMBER_DOUBLETS = 1500; +constexpr unsigned int NUM_TRIPLET_BLOCKS = 1024; +constexpr unsigned int NUM_TRIPLET_THREADS = 1024; +constexpr unsigned int NUM_DOUBLET_THREADS = 16; +constexpr unsigned int MAX_TRIPLETS = 300; +constexpr unsigned int TRIPLET_BUFFER_DEPTH = 3; + +typedef struct doubletInfo { +public: + int m_nInner[MAX_MIDDLE_SP]; + int m_nOuter[MAX_MIDDLE_SP]; + char m_good[MAX_MIDDLE_SP]; +} DOUBLET_INFO; + +typedef struct doubletStorage { +public: + int m_nItems; + int m_nI; + int m_nO; + int m_spmIdx[MAX_MIDDLE_SP]; + int m_innerStart[MAX_MIDDLE_SP]; + int m_outerStart[MAX_MIDDLE_SP]; + int m_inner[MAX_DOUBLET]; + int m_outer[MAX_DOUBLET]; +} DOUBLET_STORAGE; + +typedef struct gpuParameters { + int m_nSMX; + int m_nNUM_SMX_CORES; + int m_nNUM_TRIPLET_BLOCKS; +} GPU_PARAMETERS; + + +#endif diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.cu b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..4ade289028e32ee95869662977583e6ca257322b --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.cu @@ -0,0 +1,150 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include <cuda.h> +#include <cuda_runtime.h> + +#include "TrigAccelEvent/TrigInDetAccelEDM.h" +#include "SeedMakingWorkCuda.h" + +#include "tbb/tick_count.h" +#include <cstring> +#include <cmath> +#include <iostream> +#include <algorithm> + +#include "DoubletCountingKernelCuda.cuh" +#include "DoubletMakingKernelCuda.cuh" +#include "DoubletMatchingKernelCuda.cuh" + +SeedMakingWorkCuda::SeedMakingWorkCuda(unsigned int id, const SeedMakingWorkContextCuda& ctx, std::shared_ptr<TrigAccel::OffloadBuffer> data, + tbb::concurrent_queue<SeedMakingDeviceContext*>& CQ, tbb::concurrent_vector<WorkTimeStamp>& TL) : + m_workId(id), + m_context(0), + m_input(data), + m_contextQueue(CQ), + m_timeLine(TL) + { + + m_context = new SeedMakingWorkContextCuda(ctx);//make a copy + m_output = std::make_shared<TrigAccel::OffloadBuffer>(sizeof(TrigAccel::OUTPUT_SEED_STORAGE));//output data +} + +SeedMakingWorkCuda::~SeedMakingWorkCuda() { + if(m_context) delete(m_context);//delete the copy +} + +std::shared_ptr<TrigAccel::OffloadBuffer> SeedMakingWorkCuda::getOutput() { + return m_output; +} + +bool SeedMakingWorkCuda::run() { + + m_timeLine.push_back(WorkTimeStamp(m_workId, 0, tbb::tick_count::now())); + + const SeedMakingDeviceContext& p = *(m_context->m_pdc); + + int id = p.m_deviceId; + + TrigAccel::OUTPUT_SEED_STORAGE* ps = reinterpret_cast<TrigAccel::OUTPUT_SEED_STORAGE*>(p.h_outputseeds); + + cudaSetDevice(id); + + checkError(); + + cudaMemcpyAsync(p.d_settings, p.h_settings, sizeof(TrigAccel::SEED_FINDER_SETTINGS), cudaMemcpyHostToDevice, p.m_stream); + + checkError(); + + cudaMemcpyAsync(p.d_spacepoints, p.h_spacepoints, sizeof(TrigAccel::SPACEPOINT_STORAGE), cudaMemcpyHostToDevice, p.m_stream); + + checkError(); + + cudaStreamSynchronize(p.m_stream); + + TrigAccel::SEED_FINDER_SETTINGS* dSettings = reinterpret_cast<TrigAccel::SEED_FINDER_SETTINGS *>(p.d_settings); + TrigAccel::SPACEPOINT_STORAGE* dSpacepoints = reinterpret_cast<TrigAccel::SPACEPOINT_STORAGE *>(p.d_spacepoints); + TrigAccel::DETECTOR_MODEL* dDetModel = reinterpret_cast<TrigAccel::DETECTOR_MODEL*>(p.d_detmodel); + TrigAccel::OUTPUT_SEED_STORAGE* dOutput = reinterpret_cast<TrigAccel::OUTPUT_SEED_STORAGE*>(p.d_outputseeds); + + DOUBLET_INFO* dInfo = reinterpret_cast<DOUBLET_INFO*>(p.d_doubletinfo); + DOUBLET_STORAGE* dStorage = reinterpret_cast<DOUBLET_STORAGE*>(p.d_doubletstorage); + + cudaMemset(p.d_outputseeds,0,10*sizeof(int)); + + checkError(); + + cudaMemset(p.d_doubletstorage,0,3*sizeof(int)); + + checkError(); + + const TrigAccel::SPACEPOINT_STORAGE* pSPS = reinterpret_cast<const TrigAccel::SPACEPOINT_STORAGE *>(p.h_spacepoints); + int nSlices = pSPS->m_nPhiSlices; + int nLayers = pSPS->m_nLayers; + + int nMiddleSp = NUM_MIDDLE_THREADS;//determines size of the doublet/triplet buffers + int nOtherSp = OUTER_THREADS_MULTIPLIER*p.m_gpuParams.m_nNUM_SMX_CORES/NUM_MIDDLE_THREADS;//the size of the spacepoint buffer + + dim3 gridDimensions(nSlices, nLayers); + dim3 blockDimensions(nMiddleSp, nOtherSp); + + cudaMemset(p.d_doubletinfo,0,sizeof(DOUBLET_INFO)); + + checkError(); + + cudaStreamSynchronize(p.m_stream); + + checkError(); + + doubletCountingKernel<<<gridDimensions, blockDimensions, 0, p.m_stream>>>(dSettings, dSpacepoints, dDetModel, dInfo, nLayers, nSlices); + + cudaStreamSynchronize(p.m_stream); + + checkError(); + + doubletMakingKernel<<<gridDimensions, blockDimensions, 0, p.m_stream>>>(dSettings, dSpacepoints, dDetModel, dOutput, + dInfo, dStorage, nLayers, nSlices); + + cudaStreamSynchronize(p.m_stream); + + checkError(); + + int nStats[3]; + + cudaMemcpy(&nStats[0], p.d_doubletstorage, 3*sizeof(int), cudaMemcpyDeviceToHost); + + + doubletMatchingKernel<<<p.m_gpuParams.m_nNUM_TRIPLET_BLOCKS, NUM_TRIPLET_THREADS, 0, p.m_stream>>>(dSettings, dSpacepoints, dDetModel, dInfo, + dStorage, dOutput, nStats[0]); + + cudaStreamSynchronize(p.m_stream); + + checkError(); + + TrigAccel::OUTPUT_SEED_STORAGE* pOutput = reinterpret_cast<TrigAccel::OUTPUT_SEED_STORAGE *>(m_output->m_rawBuffer); + + //Read back GPU results + + pOutput->m_nMiddleSps = 0; + pOutput->m_nSeeds = 0; + pOutput->m_nI = 0; + pOutput->m_nO = 0; + pOutput->m_nCandidates = 0; + + cudaMemcpyAsync(p.h_outputseeds, p.d_outputseeds, sizeof(TrigAccel::OUTPUT_SEED_STORAGE), cudaMemcpyDeviceToHost, p.m_stream); + + checkError(); + + cudaStreamSynchronize(p.m_stream); + + checkError(); + + memcpy(pOutput, ps, sizeof(TrigAccel::OUTPUT_SEED_STORAGE)); + + m_contextQueue.push(m_context->m_pdc); + + m_timeLine.push_back(WorkTimeStamp(m_workId, 1, tbb::tick_count::now())); + + return true; +} diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.h b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.h new file mode 100644 index 0000000000000000000000000000000000000000..cc8a5bb2f0d9774e2a7fed17e17858d6e232f4b8 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/SeedMakingWorkCuda.h @@ -0,0 +1,48 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_SEEDMAKINGWORKCUDA_H +#define TRIGINDETCUDA_SEEDMAKINGWORKCUDA_H + +#include <vector> + +#include "tbb/concurrent_queue.h" +#include "tbb/concurrent_vector.h" +#include "TrigInDetDataContexts.h" +#include "TrigAccelEvent/Work.h" +#include "TrigAccelEvent/WorkFactory.h" + + +class SeedMakingWorkCuda : public TrigAccel::Work{ + +public: + SeedMakingWorkCuda(unsigned int, const SeedMakingWorkContextCuda&, std::shared_ptr<TrigAccel::OffloadBuffer> data, + tbb::concurrent_queue<SeedMakingDeviceContext*>&, tbb::concurrent_vector<WorkTimeStamp>&); + ~SeedMakingWorkCuda(); + std::shared_ptr<TrigAccel::OffloadBuffer> getOutput(); + bool run(); + unsigned int getId() const { + return m_workId; + } + +private: + + inline void checkError() const { + cudaError_t error = cudaGetLastError(); + if(error != cudaSuccess) { + printf("CUDA error: %s\n", cudaGetErrorString(error)); + exit(-1); + } + }; + + unsigned int m_workId; + SeedMakingWorkContextCuda* m_context; + std::shared_ptr<TrigAccel::OffloadBuffer> m_input, m_output; + tbb::concurrent_queue<SeedMakingDeviceContext*>& m_contextQueue; + tbb::concurrent_vector<WorkTimeStamp>& m_timeLine; + + float m_CovMS, m_ptCoeff, m_minPt2, m_ptCoeff2, m_maxD0; +}; + +#endif diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetDataContexts.h b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetDataContexts.h new file mode 100644 index 0000000000000000000000000000000000000000..8ddf837d4802ca9aecc1dfe2118aa9bbc1644f83 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetDataContexts.h @@ -0,0 +1,82 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_TRIGINDETDATACONTEXTS_H +#define TRIGINDETCUDA_TRIGINDETDATACONTEXTS_H + +#include "TrigAccelEvent/TrigInDetAccelEDM.h" + +#include "SeedMakingDataStructures.h" +#include <cuda_runtime.h> +#include <tbb/tick_count.h> + +class WorkTimeStamp { +public: + WorkTimeStamp(unsigned int id, int ev, const tbb::tick_count& t) : + m_workId(id), m_eventType(ev), m_time(t) {}; + WorkTimeStamp(const WorkTimeStamp& w) : m_workId(w.m_workId), m_eventType(w.m_eventType), m_time(w.m_time) {}; + unsigned int m_workId; + int m_eventType; + tbb::tick_count m_time; +}; + + +class SeedMakingWorkContext {//base class +public: + SeedMakingWorkContext() {}; + SeedMakingWorkContext(const SeedMakingWorkContext& c) {}; +}; + +struct SeedMakingDeviceContext { +public: + SeedMakingDeviceContext() : m_deviceId(-1), h_spacepoints(0), d_spacepoints(0), d_size(0), h_size(0) {}; + size_t hostSize() { return h_size;} + size_t deviceSize() { return d_size;} + + int m_deviceId; + cudaStream_t m_stream; + unsigned char *h_settings; + unsigned char *d_settings; + unsigned char *h_spacepoints; + unsigned char *d_spacepoints; + + unsigned char *d_detmodel; + + unsigned char *h_outputseeds; + unsigned char *d_outputseeds; + + unsigned char *d_doubletstorage; + unsigned char *d_doubletinfo; + + size_t d_size, h_size; + GPU_PARAMETERS m_gpuParams; + +private: + SeedMakingDeviceContext(const SeedMakingDeviceContext& sc) : m_deviceId(sc.m_deviceId) {}; +}; + + + +class SeedMakingWorkContextCuda : public SeedMakingWorkContext { +public: + SeedMakingWorkContextCuda(SeedMakingDeviceContext* pdc, + bool pinm = true, bool wcm = false, bool link = false) : SeedMakingWorkContext(), + m_pdc(pdc), + m_usePinnedMemory(pinm), + m_useWriteCombinedMemory(wcm), + m_linkOutputToShm(link) {}; + + SeedMakingWorkContextCuda(const SeedMakingWorkContextCuda& c) : SeedMakingWorkContext(), + m_pdc(c.m_pdc), + m_usePinnedMemory(c.m_usePinnedMemory), + m_useWriteCombinedMemory(c.m_useWriteCombinedMemory), + m_linkOutputToShm(c.m_linkOutputToShm) {}; + SeedMakingDeviceContext* m_pdc; + bool m_usePinnedMemory; + bool m_useWriteCombinedMemory; + bool m_linkOutputToShm; +}; + + +#endif diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.cu b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..f24815a593a6eb0929bb865a9f1ce63bae1c7403 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.cu @@ -0,0 +1,302 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#include <cuda.h> +#include <cuda_runtime.h> +#include <atomic> + +#include "TrigInDetModuleCuda.h" +#include "SeedMakingDataStructures.h" +#include "SeedMakingWorkCuda.h" + +#include "TrigAccelEvent/TrigInDetAccelCodes.h" + +extern "C" TrigAccel::WorkFactory* getFactory() { + return new TrigInDetModuleCuda(); +} + +extern "C" int getFactoryId() { + return TrigAccel::TrigInDetModuleID_CUDA; +} + +extern "C" void deleteFactory(TrigAccel::WorkFactory* c){ + TrigInDetModuleCuda* mod=reinterpret_cast<TrigInDetModuleCuda*>(c); + delete mod; +} + +TrigInDetModuleCuda::TrigInDetModuleCuda() : m_maxNumberOfContexts(12), m_maxDevice(0), m_usePinnedMemory(true), + m_useWriteCombinedMemory(false), m_linkOutputToShm(false), m_dumpTimeLine(false) { + + m_d_detmodels.clear(); + + cudaGetDeviceCount(&m_maxDevice); + + cudaError_t error = cudaGetLastError(); + + if(error != cudaSuccess) { + m_maxDevice = 0; + } + + for(unsigned int i=0;i<getProvidedAlgs().size();i++) { + m_workItemCounters[i] = 0; + } + + m_timeLine.clear(); + +} + +TrigInDetModuleCuda::~TrigInDetModuleCuda() { + + SeedMakingDeviceContext* ps = 0; + + std::cout<<"deleting "<<m_seedMakingDcQueue.unsafe_size()<<" device contexts"<<std::endl; + + while(m_seedMakingDcQueue.try_pop(ps)) deleteSeedMakingContext(ps); + + for(auto dm : m_d_detmodels) { + cudaSetDevice(dm.first); + cudaFree(dm.second); + + } + m_d_detmodels.clear(); + + if(m_dumpTimeLine) { + + std::cout<<"time_line has "<<m_timeLine.size()<<" events"<<std::endl; + if(m_timeLine.size() > 0) { + tbb::tick_count t0 = m_timeLine[0].m_time; + std::ofstream tl("timeLine.csv"); + tl<<"workId,eventType,time"<<std::endl; + tl<<m_timeLine[0].m_workId<<","<<m_timeLine[0].m_eventType<<",0"<<std::endl; + for(unsigned int tIdx = 1;tIdx < m_timeLine.size();++tIdx) { + tbb::tick_count t1 = m_timeLine[tIdx].m_time; + auto duration = t1-t0; + tl<<m_timeLine[tIdx].m_workId<<","<<m_timeLine[tIdx].m_eventType<<","<<1000*duration.seconds()<<std::endl; + } + tl.close(); + } + } +} + +bool TrigInDetModuleCuda::configure() { + + std::vector<int> allowedGPUs, nDeviceContexts; + + allowedGPUs.resize(1,0);//configured for just 1 device with deviceId = 0 + + nDeviceContexts.resize(1,8);//configured for 8 DataContexts + + if(m_maxDevice == 0) { + std::cout<<"No CUDA devices found"<<std::endl; + return false; + } + + if(allowedGPUs.empty() || nDeviceContexts.empty()) return false; + + if(allowedGPUs.size() != nDeviceContexts.size()) return false; + + unsigned int dcIndex=0; + + size_t memTotalSize = 0; + + std::vector< SeedMakingDeviceContext*> vSeedDCs[100];//we do not have that many GPUs + + int nDCTotal=0; + + for(std::vector<int>::iterator devIt = allowedGPUs.begin(); devIt!= allowedGPUs.end();++devIt, dcIndex++) { + + int deviceId = (*devIt); + + if(deviceId<0 || deviceId>=m_maxDevice) continue; + + size_t memTotalSizeOnDevice = 0; + + cudaSetDevice(deviceId); + + checkError(); + + unsigned char* d_detmodel; + + cudaMalloc((void **)&d_detmodel, sizeof(TrigAccel::DETECTOR_MODEL)); + + checkError(); + + m_d_detmodels.insert(std::pair<unsigned int, unsigned char*>(deviceId, d_detmodel)); + + int nDC = nDeviceContexts[dcIndex]; + nDCTotal += nDC; + + memTotalSizeOnDevice += sizeof(TrigAccel::DETECTOR_MODEL); + + for(int dc=0;dc<nDC;dc++) { + SeedMakingDeviceContext* p = createSeedMakingContext(deviceId); + memTotalSizeOnDevice += p->deviceSize(); + vSeedDCs[dcIndex].push_back(p); + } + + memTotalSize += memTotalSizeOnDevice; + + std::cout<<"GPU"<<deviceId<<" allocated data context size = "<<1e-6*memTotalSizeOnDevice<<" MBytes"<<std::endl; + } + + int nDCLeft = nDCTotal; + while(nDCLeft>0) { + for(unsigned int iGPU=0;iGPU<allowedGPUs.size();iGPU++) { + if(vSeedDCs[iGPU].empty()) continue; + m_seedMakingDcQueue.push(vSeedDCs[iGPU].back()); + vSeedDCs[iGPU].pop_back(); + --nDCLeft; + } + } + + std::cout<<"Data context queue : "; + for(tbb::concurrent_queue< SeedMakingDeviceContext*>::const_iterator i(m_seedMakingDcQueue.unsafe_begin()); i!=m_seedMakingDcQueue.unsafe_end(); ++i ) { + std::cout<<(*i)->m_deviceId<<" "; + } + std::cout<<std::endl; + + std::cout<<"Total size of memory allocated on all GPUs = "<<1e-6*memTotalSize<<" MBytes"<<std::endl; + + return true; +} + + +SeedMakingDeviceContext* TrigInDetModuleCuda::createSeedMakingContext(int id) { + + cudaSetDevice(id); + + SeedMakingDeviceContext* p = new SeedMakingDeviceContext; + + p->m_deviceId = id; + + //set stream + + cudaStreamCreate(&p->m_stream); + + //check device property + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, id); + + p->m_gpuParams.m_nSMX = deviceProp.multiProcessorCount; + + int ncores = 0; + + if ((deviceProp.minor == 1) || (deviceProp.minor == 2)) ncores = 128; + else if (deviceProp.minor == 0) ncores = 64; + else printf("Cannot determine the number of cores: unknown device type\n"); + + p->m_gpuParams.m_nNUM_SMX_CORES = ncores;//_ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor); + p->m_gpuParams.m_nNUM_TRIPLET_BLOCKS = NUM_TRIPLET_BLOCKS; + if(deviceProp.maxThreadsPerBlock < p->m_gpuParams.m_nNUM_TRIPLET_BLOCKS) + p->m_gpuParams.m_nNUM_TRIPLET_BLOCKS = deviceProp.maxThreadsPerBlock; + + //Allocate memory + + cudaMalloc((void **)&p->d_settings, sizeof(TrigAccel::SEED_FINDER_SETTINGS)); + cudaMalloc((void **)&p->d_spacepoints, sizeof(TrigAccel::SPACEPOINT_STORAGE)); + + auto dmIt = m_d_detmodels.find(p->m_deviceId); + if(dmIt!=m_d_detmodels.end()) { + p->d_detmodel = (*dmIt).second; + } + + cudaMalloc((void **)&p->d_outputseeds, sizeof(TrigAccel::OUTPUT_SEED_STORAGE)); + cudaMalloc((void **)&p->d_doubletstorage, sizeof(DOUBLET_STORAGE)); + cudaMalloc((void **)&p->d_doubletinfo, sizeof(DOUBLET_INFO)); + + + p->d_size = sizeof(TrigAccel::SEED_FINDER_SETTINGS) + + sizeof(TrigAccel::SPACEPOINT_STORAGE) + sizeof(TrigAccel::OUTPUT_SEED_STORAGE) + sizeof(DOUBLET_STORAGE) + sizeof(DOUBLET_INFO); + + cudaMallocHost((void **)&p->h_settings, sizeof(TrigAccel::SEED_FINDER_SETTINGS)); + cudaMallocHost((void **)&p->h_spacepoints, sizeof(TrigAccel::SPACEPOINT_STORAGE)); + cudaMallocHost((void **)&p->h_outputseeds, sizeof(TrigAccel::OUTPUT_SEED_STORAGE)); + + p->h_size = sizeof(TrigAccel::SEED_FINDER_SETTINGS) + sizeof(TrigAccel::SPACEPOINT_STORAGE) + sizeof(TrigAccel::OUTPUT_SEED_STORAGE); + + return p; +} + +void TrigInDetModuleCuda::deleteSeedMakingContext(SeedMakingDeviceContext* p) { + + int id = p->m_deviceId; + + cudaSetDevice(id); + + cudaStreamDestroy(p->m_stream); + + cudaFree(p->d_settings); + cudaFree(p->d_spacepoints); + + cudaFree(p->d_outputseeds); + cudaFree(p->d_doubletstorage); + cudaFree(p->d_doubletinfo); + + cudaFreeHost(p->h_settings); + cudaFreeHost(p->h_spacepoints); + cudaFreeHost(p->h_outputseeds); + + delete p; + +} + + +TrigAccel::Work* TrigInDetModuleCuda::createWork(int workType, std::shared_ptr<TrigAccel::OffloadBuffer> data){ + + if(workType == TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT){ + + for(auto dm : m_d_detmodels) { + + unsigned int deviceId = dm.first; + + cudaSetDevice(deviceId); + + cudaMemcpy(dm.second, (unsigned char*)data->get(), sizeof(TrigAccel::DETECTOR_MODEL), cudaMemcpyHostToDevice); + } + return 0; + } + + if(workType == TrigAccel::InDetJobControlCode::MAKE_SEEDS){ + + SeedMakingDeviceContext* ctx = 0; + + while(!m_seedMakingDcQueue.try_pop(ctx)) { + // std::cout<<"waiting for free device context..."<<std::endl; + }; + + TrigAccel::SEED_MAKING_JOB *pArray = reinterpret_cast<TrigAccel::SEED_MAKING_JOB*>(data->get()); + + //1. copy settings to the context host array + + TrigAccel::SEED_FINDER_SETTINGS* p_settings = reinterpret_cast<TrigAccel::SEED_FINDER_SETTINGS*>(ctx->h_settings); + memcpy(p_settings, &pArray->m_settings, sizeof(TrigAccel::SEED_FINDER_SETTINGS)); + + //2. copy spacepoints to the context host array + + TrigAccel::SPACEPOINT_STORAGE* p_spacePoints = reinterpret_cast<TrigAccel::SPACEPOINT_STORAGE*>(ctx->h_spacepoints); + memcpy(p_spacePoints, &pArray->m_data, sizeof(TrigAccel::SPACEPOINT_STORAGE)); + + unsigned int workNum = m_workItemCounters[0]++;//seed making uses counter #0 + + unsigned int workId = workNum*100; + + SeedMakingWorkCuda* w = new SeedMakingWorkCuda(workId, SeedMakingWorkContextCuda(ctx, m_usePinnedMemory, + m_useWriteCombinedMemory, + m_linkOutputToShm), data, m_seedMakingDcQueue, m_timeLine); + + return w; + } + + return 0; +} + +const std::vector<int> TrigInDetModuleCuda::getProvidedAlgs(){ + std::vector<int> v{ + TrigAccel::InDetJobControlCode::SIL_LAYERS_EXPORT, + TrigAccel::InDetJobControlCode::MAKE_SEEDS + }; + return v; +} + diff --git a/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.h b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.h new file mode 100644 index 0000000000000000000000000000000000000000..44559910072535e13ce790ea925c774236f39f67 --- /dev/null +++ b/Trigger/TrigAccel/TrigInDetCUDA/src/TrigInDetModuleCuda.h @@ -0,0 +1,75 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ + +#ifndef TRIGINDETCUDA_TRIGINDETMODULECUDA_H +#define TRIGINDETCUDA_TRIGINDETMODULECUDA_H + +#include <map> +#include <atomic> +#include "TrigAccelEvent/WorkFactory.h" +#include "TrigAccelEvent/TrigInDetAccelEDM.h" +#include "TrigAccelEvent/TrigInDetAccelCodes.h" + +#include "TrigInDetDataContexts.h" + +#include "tbb/concurrent_queue.h" +#include "tbb/concurrent_vector.h" + +class TrigInDetModuleCuda : public TrigAccel::WorkFactory { + + public: + + TrigInDetModuleCuda(); + + ~TrigInDetModuleCuda(); + + bool configure(); + + TrigAccel::Work* createWork(int, std::shared_ptr<TrigAccel::OffloadBuffer>); + + const std::vector<int> getProvidedAlgs(); + + virtual int getFactoryId() { + return TrigAccel::TrigInDetModuleID_CUDA; + } + + private: + + inline void checkError() const { + cudaError_t error = cudaGetLastError(); + if(error != cudaSuccess) { + printf("CUDA error: %s\n", cudaGetErrorString(error)); + exit(-1); + } + }; + + //data structures + + //1. "const" data: managed by the Factory + + std::map<unsigned int, unsigned char*> m_d_detmodels; + + int m_maxNumberOfContexts;//Factory setting + int m_maxDevice; + + bool m_usePinnedMemory; + bool m_useWriteCombinedMemory; + bool m_linkOutputToShm; + + // host / device pairs for each device + + tbb::concurrent_queue<SeedMakingDeviceContext*> m_seedMakingDcQueue; + + // data context allocation / de-allocation + + SeedMakingDeviceContext* createSeedMakingContext(int); + void deleteSeedMakingContext(SeedMakingDeviceContext*); + + bool m_dumpTimeLine; + + std::atomic<unsigned int> m_workItemCounters[100];//atomic counters for unique Work identification + tbb::concurrent_vector<WorkTimeStamp> m_timeLine; + }; + +#endif diff --git a/Trigger/TrigAlgorithms/TrigFastTrackFinder/CMakeLists.txt b/Trigger/TrigAlgorithms/TrigFastTrackFinder/CMakeLists.txt index 092618f1f8ec802b58c3147420e7e184e83a6da5..5505cd63d4322994a546536c42474406ecf4158f 100644 --- a/Trigger/TrigAlgorithms/TrigFastTrackFinder/CMakeLists.txt +++ b/Trigger/TrigAlgorithms/TrigFastTrackFinder/CMakeLists.txt @@ -35,7 +35,10 @@ atlas_depends_on_subdirs( PUBLIC Tracking/TrkEvent/TrkEventUtils Trigger/TrigEvent/TrigInDetPattRecoEvent Trigger/TrigTools/TrigInDetToolInterfaces - Trigger/TrigTools/TrigTimeAlgs ) + Trigger/TrigTools/TrigTimeAlgs + Trigger/TrigAccel/TrigAccelEvent + Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationTool + Trigger/TrigAccel/TrigInDetAccel/TrigInDetAccelerationService) # External dependencies: find_package( TBB ) diff --git a/Trigger/TrigAlgorithms/TrigFastTrackFinder/python/TrigFastTrackFinder_Config.py b/Trigger/TrigAlgorithms/TrigFastTrackFinder/python/TrigFastTrackFinder_Config.py index 347af0cea78ae12ec2fb9d0c9c2474d4be23e4c7..d43b491fe07df36dc427e46518d1ae328019e51d 100755 --- a/Trigger/TrigAlgorithms/TrigFastTrackFinder/python/TrigFastTrackFinder_Config.py +++ b/Trigger/TrigAlgorithms/TrigFastTrackFinder/python/TrigFastTrackFinder_Config.py @@ -185,6 +185,24 @@ class TrigFastTrackFinderBase(TrigFastTrackFinder): self.LayerNumberTool = numberingTool from InDetTrigRecExample.InDetTrigSliceSettings import InDetTrigSliceSettings + + # GPU offloading config begins + + #GPU_key = ('useGPU', remapped_type) + #self.useGPU = InDetTrigSliceSettings[GPU_key] + + self.useGPU = False + + #if type == "FS" : self.useGPU = True + + if self.useGPU : + from TrigInDetAccelerationTool.TrigInDetAccelerationToolConf import TrigInDetAccelerationTool + accelTool = TrigInDetAccelerationTool(name = "TrigInDetAccelerationTool_FTF") + ToolSvc += accelTool + + + # GPU offloading config ends + self.doResMon = InDetTrigSliceSettings[('doResMon',remapped_type)] # switch between Run-2/3 monitoring diff --git a/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.cxx b/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.cxx index d7fb3422950d514c78c2f4c61a11d29916f211c8..38c0c5cd2b07b8082c4a54f733d0e24a217b979d 100644 --- a/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.cxx +++ b/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.cxx @@ -61,6 +61,12 @@ #include "TrigInDetToolInterfaces/ITrigZFinder.h" #include "SiSpacePointsSeed/SiSpacePointsSeed.h" + +//for GPU acceleration + +#include "TrigInDetAccelerationTool/ITrigInDetAccelerationTool.h" +#include "TrigInDetAccelerationService/ITrigInDetAccelerationSvc.h" + #include "TrigFastTrackFinder.h" #include "AthenaBaseComps/AthMsgStreamMacros.h" #include "CxxUtils/phihelper.h" @@ -70,6 +76,11 @@ #include "AthenaMonitoringKernel/Monitored.h" #include "GaudiKernel/ThreadLocalContext.h" +//for GPU acceleration + +#include "TrigAccelEvent/TrigInDetAccelEDM.h" +#include "TrigAccelEvent/TrigInDetAccelCodes.h" + TrigFastTrackFinder::TrigFastTrackFinder(const std::string& name, ISvcLocator* pSvcLocator) : HLT::FexAlgo(name, pSvcLocator), @@ -80,6 +91,8 @@ TrigFastTrackFinder::TrigFastTrackFinder(const std::string& name, ISvcLocator* p m_trigInDetTrackFitter("TrigInDetTrackFitter"), m_trigZFinder("TrigZFinder/TrigZFinder", this ), m_trackSummaryTool("Trk::ITrackSummaryTool/ITrackSummaryTool"), + m_accelTool("TrigInDetAccelerationTool"), + m_accelSvc("TrigInDetAccelerationSvc", name), m_doCloneRemoval(true), m_useBeamSpot(true), m_doZFinder(false), @@ -95,7 +108,8 @@ TrigFastTrackFinder::TrigFastTrackFinder(const std::string& name, ISvcLocator* p m_sctId(0), m_idHelper(0), m_particleHypothesis(Trk::pion), - m_useNewLayerNumberScheme(false) + m_useNewLayerNumberScheme(false), + m_useGPU(false) { /** Doublet finding properties. */ @@ -157,6 +171,8 @@ TrigFastTrackFinder::TrigFastTrackFinder(const std::string& name, ISvcLocator* p declareProperty("useNewLayerNumberScheme", m_useNewLayerNumberScheme = false); + declareProperty("useGPU", m_useGPU = false); + // declare monitoring histograms } @@ -279,6 +295,28 @@ HLT::ErrorCode TrigFastTrackFinder::hltInitialize() { ATH_MSG_DEBUG(" Feature set recorded with Key " << m_attachedFeatureName); ATH_MSG_DEBUG(" doResMon " << m_doResMonitoring); + + if(m_useGPU) {//for GPU acceleration + sc = m_accelSvc.retrieve(); + if(sc.isFailure()) { + ATH_MSG_ERROR("Could not retrieve "<<m_accelSvc); + m_useGPU = false; + } + if(!m_accelSvc->isReady()) { + ATH_MSG_INFO("Acceleration service not ready - no GPU found"); + m_useGPU = false; + } + else { + sc = m_accelTool.retrieve(); + if(sc.isFailure()) { + ATH_MSG_ERROR("Could not retrieve "<<m_accelTool); + m_useGPU = false; + } + } + } + + ATH_MSG_INFO("Use GPU acceleration : "<<std::boolalpha<<m_useGPU); + ATH_MSG_DEBUG(" Initialized successfully"); return HLT::OK; } @@ -508,20 +546,31 @@ StatusCode TrigFastTrackFinder::findTracks(InDet::SiTrackMakerEventData_xk &trac mnt_timer_TripletMaking.start(); - TRIG_TRACK_SEED_GENERATOR seedGen(m_tcs); + std::vector<TrigInDetTriplet*> triplets; + + if(!m_useGPU) { - seedGen.loadSpacePoints(convertedSpacePoints); + TRIG_TRACK_SEED_GENERATOR seedGen(m_tcs); - if (m_doZFinder && m_doFastZVseeding) { - seedGen.createSeeds(tmpRoi.get(), vZv); + seedGen.loadSpacePoints(convertedSpacePoints); + + if (m_doZFinder && m_doFastZVseeding) { + seedGen.createSeeds(tmpRoi.get(), vZv); + } + else { + seedGen.createSeeds(tmpRoi.get()); + } + + seedGen.getSeeds(triplets); } else { - seedGen.createSeeds(tmpRoi.get()); - } + //GPU offloading begins ... - std::vector<TrigInDetTriplet*> triplets; - seedGen.getSeeds(triplets); + makeSeedsOnGPU(m_tcs, tmpRoi.get(), convertedSpacePoints, triplets); + //GPU offloading ends ... + } + ATH_MSG_DEBUG("number of triplets: " << triplets.size()); mnt_timer_TripletMaking.stop(); mnt_roi_lastStageExecuted = 4; @@ -1203,3 +1252,48 @@ void TrigFastTrackFinder::runResidualMonitoring(const Trk::Track& track) const { } } +void TrigFastTrackFinder::makeSeedsOnGPU(const TrigCombinatorialSettings& tcs, const IRoiDescriptor* roi, const std +::vector<TrigSiSpacePointBase>& vsp, std::vector<TrigInDetTriplet*>& output) const { + + output.clear(); + + TrigAccel::DATA_EXPORT_BUFFER* dataBuffer = new TrigAccel::DATA_EXPORT_BUFFER(5000);//i.e. 5KB + + size_t actualSize = m_accelTool->exportSeedMakingJob(tcs, roi, vsp, *dataBuffer); + + ATH_MSG_DEBUG("SeedMakingJob is ready, data size for transfer = " <<actualSize); + + std::shared_ptr<TrigAccel::OffloadBuffer> pBuff = std::make_shared<TrigAccel::OffloadBuffer>(dataBuffer); + + TrigAccel::Work* pJob = m_accelSvc->createWork(TrigAccel::InDetJobControlCode::MAKE_SEEDS, pBuff); + + if(pJob) { + ATH_MSG_DEBUG("Work item created for task "<<TrigAccel::InDetJobControlCode::MAKE_SEEDS); + + pJob->run(); + + + std::shared_ptr<TrigAccel::OffloadBuffer> pOB = pJob->getOutput(); + + TrigAccel::OUTPUT_SEED_STORAGE* pOutput = reinterpret_cast<TrigAccel::OUTPUT_SEED_STORAGE *>(pOB->m_rawBuffer); + + ATH_MSG_DEBUG("Found "<<pOutput->m_nSeeds<<" triplets on GPU"); + + int nTriplets = pOutput->m_nSeeds; + + //copy seeds into the output buffer + + output.clear(); + + for(int k=0;k<nTriplets;k++) { + const TrigSiSpacePointBase& SPi = vsp[pOutput->m_innerIndex[k]]; + const TrigSiSpacePointBase& SPm = vsp[pOutput->m_middleIndex[k]]; + const TrigSiSpacePointBase& SPo = vsp[pOutput->m_outerIndex[k]]; + TrigInDetTriplet* t = new TrigInDetTriplet(SPi, SPm, SPo, pOutput->m_Q[k]); + output.push_back(t); + } + } + + delete pJob; + delete dataBuffer; +} diff --git a/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.h b/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.h index 164f2e79b2fd5206bf02117bded7bb6717eaa154..b4e54a9b26e0bec3e6547918436040d82be68b57 100644 --- a/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.h +++ b/Trigger/TrigAlgorithms/TrigFastTrackFinder/src/TrigFastTrackFinder.h @@ -39,7 +39,10 @@ class ITrigL2ResidualCalculator; class ITrigInDetTrackFitter; class ITrigZFinder; class TrigRoiDescriptor; +class TrigSiSpacePointBase; +class TrigInDetTriplet; class Identifier; + namespace InDet { class ISiTrackMaker; class SiTrackMakerEventData_xk; @@ -54,6 +57,11 @@ class PixelID; class SCT_ID; class AtlasDetectorID; +// for GPU acceleration + +class ITrigInDetAccelerationTool; +class ITrigInDetAccelerationSvc; + class TrigFastTrackFinder : public HLT::FexAlgo { public: @@ -99,6 +107,10 @@ protected: ToolHandle< Trk::ITrackSummaryTool > m_trackSummaryTool; ToolHandle< GenericMonitoringTool > m_monTool { this, "MonTool", "", "Monitoring tool" }; + //for GPU acceleration + ToolHandle<ITrigInDetAccelerationTool> m_accelTool; + ServiceHandle<ITrigInDetAccelerationSvc> m_accelSvc; + //DataHandles SG::ReadHandleKey<TrigRoiDescriptorCollection> m_roiCollectionKey; SG::WriteHandleKey<TrackCollection> m_outputTracksKey; @@ -164,6 +176,12 @@ protected: bool m_useNewLayerNumberScheme; + // GPU acceleration + + bool m_useGPU; + + void makeSeedsOnGPU(const TrigCombinatorialSettings&, const IRoiDescriptor*, const std::vector<TrigSiSpacePointBase>&, std::vector<TrigInDetTriplet*>&) const; + }; #endif // not TRIGFASTTRACKFINDER_TRIGFASTTRACKFINDER_H