Skip to content
Snippets Groups Projects

general code for hit sorting; sort UT hits by X

Merged Dorothea Vom Bruch requested to merge dovombru_hit_sorting into master
4 unresolved threads
  • sort UT hits by X for every layer
  • put general code that is needed for several algorithms into cuda/utils
  • make sorting code re-usable
Edited by Dorothea Vom Bruch

Merge request reports



Filter activity
  • Approvals
  • Assignees & reviewers
  • Comments (from bots)
  • Comments (from users)
  • Commits & branches
  • Edits
  • Labels
  • Lock status
  • Mentions
  • Merge request status
  • Tracking
39 * @brief Sort by var stored in sorting_vars, store index in hit_permutations
40 */
41 template<class T>
42 __host__ __device__
43 void findPermutation(
44 const T* sorting_vars,
45 const uint hit_start,
46 uint* hit_permutations,
47 const uint n_hits
48 ){
49 #ifdef __CUDA_ARCH__
50 for (unsigned int i = 0; i < (n_hits + blockDim.x - 1); ++i) {
51 const unsigned int hit_rel_index = i*blockDim.x + threadIdx.x;
52 if ( hit_rel_index < n_hits ) {
53 const int hit_index = hit_start + hit_rel_index;
54 const T var = sorting_vars[hit_index];
  • 58 for (unsigned int j = 0; j < n_hits; ++j) {
    59 const int other_hit_index = hit_start + j;
    60 const T other_var = sorting_vars[other_hit_index];
    61 // Stable sorting
    62 position += var > other_var || ( var == other_var && hit_rel_index > j );
    63 }
    64 assert(position < n_hits);
    66 // Store it in hit_permutations
    67 hit_permutations[hit_start + position] = hit_index;
    68 }
    69 }
    70 #else
    71 for (unsigned int i = 0; i < n_hits; ++i) {
    72 const int hit_index = hit_start + i;
    73 const T var = sorting_vars[hit_index];
  • 44 24 uint* new_hit_IDs = (uint*) hit_Zs;
    45 25
    46 26 // Apply permutation across all arrays
    47 apply_permutation(hit_permutations, event_hit_start, event_number_of_hits, hit_Xs, new_hit_Xs);
    27 applyPermutation(hit_permutations, event_hit_start, event_number_of_hits, hit_Xs, new_hit_Xs);
  • 293 293 cudaCheck(cudaMemcpyAsync(host_velo_states, argen.generate<arg::dev_velo_states>(argument_offsets), argen.size<arg::dev_velo_states>(host_number_of_reconstructed_velo_tracks[0]), cudaMemcpyDeviceToHost, stream));
    294 294 }
    295 295
    296 // VeloUT tracking
    296 // UT hit sorting by x
    297 297 argument_sizes[arg::dev_ut_hits] = argen.size<arg::dev_ut_hits>(number_of_events);
    298 argument_sizes[arg::dev_ut_hits_sorted] = argen.size<arg::dev_ut_hits_sorted>(number_of_events);
    299 argument_sizes[arg::dev_ut_hit_permutations] = argen.size<arg::dev_ut_hit_permutations>(number_of_events * VeloUTTracking::max_numhits_per_event);
    300 scheduler.setup_next(argument_sizes, argument_offsets, sequence_step++);
    301 cudaCheck(cudaMemcpyAsync(argen.generate<arg::dev_ut_hits>(argument_offsets), host_ut_hits_events, number_of_events * sizeof(VeloUTTracking::HitsSoA), cudaMemcpyHostToDevice, stream ));
  • added 3 commits

    • cc304dc4 - consistent with naming scheme for functions
    • c90aac24 - avoid copy
    • d5bfd701 - take size of container to copy from memory handler

    Compare with previous version

  • mentioned in commit 279b041c

  • Please register or sign in to reply