diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..209fb9abdcb3900c3e43d92228209c7865627553 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,248 @@ +stages: + - build + - run + - publish + +.build_job: &build_job_def + only: + refs: + - master + - schedules + - web + + stage: build + script: + - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS} + - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES} + - PREVIOUS_IFS=${IFS} + - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}" + - IFS=${PREVIOUS_IFS} + - SEQUENCE=${JOB_NAME_SPLIT[0]} + - BUILD_ARCH_FLAG="-gencode arch=compute_75,code=sm_75 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_30,code=sm_30" + - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH + - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh + - mkdir build + - cd build + - cmake -DSEQUENCE=${SEQUENCE} -DOVERRIDE_ARCH_FLAG="${BUILD_ARCH_FLAG}" -DCPU_ARCH="ivybridge" .. + - make -j + artifacts: + name: "$CI_JOB_NAME" + expire_in: 2 hrs + paths: + - build*/*Allen* + - input + retry: 1 + +.run_throughput_job: &run_throughput_job_def + only: + refs: + - master + - schedules + - web + stage: run + script: + - TOPLEVEL=${PWD} + - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS} + - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES} + - declare -A DEVICE_MEMORY_MAP=${DEVICE_MEMORY} + - PREVIOUS_IFS=${IFS} + - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}" + - IFS=${PREVIOUS_IFS} + - DEVICE_ID=${JOB_NAME_SPLIT[0]} + - SEQUENCE=${JOB_NAME_SPLIT[1]} + - D_NUMBER=${DEVICE_NUMBERS_MAP[${DEVICE_ID}]} + - D_CAPABILITY=${DEVICE_CAPABILITIES_MAP[${DEVICE_ID}]} + - D_MEMORY=${DEVICE_MEMORY_MAP[${DEVICE_ID}]} + - RUN_OPTIONS="-n 1000 -r 100 -t 8 -c 0" + - if [ "${D_MEMORY}" = "LOW" ]; then + - RUN_OPTIONS="-n 1000 -r 100 -t 2 -m 700 -c 0" + - fi + - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH + - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh + - mkdir output_${DEVICE_ID} + - cd build + - ls + - export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH + - CUDA_VISIBLE_DEVICES=${D_NUMBER} ./Allen -f /localprojects/shared/1kevents_minbias_UT_LHCbID_fix ${RUN_OPTIONS} 2>&1 | tee ../output_${DEVICE_ID}/output.txt + - CUDA_VISIBLE_DEVICES=${D_NUMBER} nvprof ./Allen -f /localprojects/shared/1kevents_minbias_UT_LHCbID_fix ${RUN_OPTIONS} 2>&1 | tee ../output_${DEVICE_ID}/profiler_output.txt + - python3 ${TOPLEVEL}/checker/plotting/extract_algo_breakdown.py -d ${TOPLEVEL} + artifacts: + name: "$CI_JOB_NAME" + expire_in: 2 hrs + paths: + - output_* + allow_failure: true + +.throughput_cli_plot_job: &publish_algo_breakdown_plot_def + only: + refs: + - master + - schedules + - web + stage: publish + script: + - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS} + - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES} + - PREVIOUS_IFS=${IFS} + - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}" + - IFS=${PREVIOUS_IFS} + - DEVICE_ID=${JOB_NAME_SPLIT[0]} + - SEQUENCE=${JOB_NAME_SPLIT[1]} + - D_NUMBER=${DEVICE_NUMBERS_MAP[${DEVICE_ID}]} + - D_CAPABILITY=${DEVICE_CAPABILITIES_MAP[${DEVICE_ID}]} + - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH + - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh + - python3 checker/plotting/csv_plotter.py -t "Algorithm Breakdown for ${SEQUENCE}" -u "%" -x 40 -m ${MATTERMOST_KEY} output_${DEVICE_ID}/algo_breakdown.csv + - python3 checker/plotting/csv_plotter.py -t "Algorithm Groups for ${SEQUENCE}" -u "%" -m ${MATTERMOST_KEY} output_${DEVICE_ID}/algo_summary.csv + +.throughput_throughput_job: &publish_throughput_job_def + only: + refs: + - master + - schedules + - web + stage: publish + script: + - PREVIOUS_IFS=${IFS} + - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}" + - IFS=${PREVIOUS_IFS} + - SEQUENCE=${JOB_NAME_SPLIT[1]} + - cat output_*/output.txt | grep --color=none "device" | sed 's/.*:\ //' > devices.txt + - cat output_*/output.txt | grep --color=none "events/s" | awk '{ print $1; }' > throughputs.txt + - cat devices.txt + - cat throughputs.txt + - paste -d, devices.txt throughputs.txt > devices_throughputs.csv + - cat devices_throughputs.csv + - python3 checker/plotting/csv_plotter.py -t "Throughputs for ${SEQUENCE}" -u "kHz" -x 70 -s 1e-3 -m ${MATTERMOST_KEY} devices_throughputs.csv + - python3 checker/plotting/post_telegraf.py -d . -s ${SEQUENCE} -b ${CI_COMMIT_REF_NAME} + + +.throughput_speedup_job: &publish_speedup_job_def + only: + refs: + - master + - schedules + - web + stage: publish + script: + - cat output_*/output.txt | grep --color=none "device" | sed 's/.*:\ //' > devices.txt + - cat output_*/output.txt | grep --color=none "events/s" | awk '{ print $1; }' > throughputs.txt + - cat devices.txt + - cat throughputs.txt + - paste -d, devices.txt throughputs.txt > devices_throughputs.csv + - cat devices_throughputs.csv + - python3 checker/plotting/csv_plotter.py -n -t "Speedup across GPUs" -u "x" -x 30 -m ${MATTERMOST_KEY} devices_throughputs.csv + +# Build on all platforms +LookingForwardKalman:build: + <<: *build_job_def + tags: + - t4 + +# Run on all platforms +rtx2080ti:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - rtx2080ti + dependencies: + - LookingForwardKalman:build + +v100:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - v100 + dependencies: + - LookingForwardKalman:build + +t4:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - t4 + dependencies: + - LookingForwardKalman:build + +gtx10606g:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtx10606g + dependencies: + - LookingForwardKalman:build + +gtx980:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtx980 + dependencies: + - LookingForwardKalman:build + +gtx680:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtx680 + dependencies: + - LookingForwardKalman:build + +gtxtitanx:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtxtitanx + dependencies: + - LookingForwardKalman:build + +gtx670:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtx670 + dependencies: + - LookingForwardKalman:build + +gtx1080ti:LookingForwardKalman:run_throughput: + <<: *run_throughput_job_def + tags: + - gtx1080ti + dependencies: + - LookingForwardKalman:build + +# Publish +# Plain results to mattermost + +# TODO: Make an intermediate stage for this job, modify script to +# support the nvprof output as input +rtx2080ti:LookingForwardKalman:publish_algo_breakdown_plot: + <<: *publish_algo_breakdown_plot_def + tags: + - gpu + dependencies: + - rtx2080ti:LookingForwardKalman:run_throughput + +throughput:LookingForwardKalman:publish_throughput: + <<: *publish_throughput_job_def + tags: + - gpu + dependencies: + - rtx2080ti:LookingForwardKalman:run_throughput + - v100:LookingForwardKalman:run_throughput + - t4:LookingForwardKalman:run_throughput + - gtx10606g:LookingForwardKalman:run_throughput + - gtx680:LookingForwardKalman:run_throughput + - gtxtitanx:LookingForwardKalman:run_throughput + - gtx670:LookingForwardKalman:run_throughput + - gtx1080ti:LookingForwardKalman:run_throughput + - gtx980:LookingForwardKalman:run_throughput + +# The "gpu" tag is to require python3 essentially +speedup:LookingForwardKalman:publish_speedup: + <<: *publish_speedup_job_def + tags: + - gpu + dependencies: + - rtx2080ti:LookingForwardKalman:run_throughput + - v100:LookingForwardKalman:run_throughput + - t4:LookingForwardKalman:run_throughput + - gtx10606g:LookingForwardKalman:run_throughput + - gtx680:LookingForwardKalman:run_throughput + - gtx980:LookingForwardKalman:run_throughput + - gtxtitanx:LookingForwardKalman:run_throughput + - gtx670:LookingForwardKalman:run_throughput + - gtx1080ti:LookingForwardKalman:run_throughput + - gtx980:LookingForwardKalman:run_throughput diff --git a/CMakeLists.txt b/CMakeLists.txt index ab5940b3ea100f8b94fe391ff1e65c3ed87efa34..557106b8a2708038e4f39fd98b6ce0a387b2245e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,8 +31,13 @@ if(NOT SEQUENCE) set(SEQUENCE DefaultSequence) endif(NOT SEQUENCE) +if(NOT DEFINED CPU_ARCH) + set(CPU_ARCH native) +endif() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${CPU_ARCH}") +MESSAGE(STATUS "CPU_ARCH: " ${CPU_ARCH}) + set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG") @@ -40,7 +45,9 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lpthread") set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Configuration of CUDA compute architecture -set(CUDA_ARCH "MAX" CACHE STRING "Cuda architecture") +if(NOT DEFINED CUDA_ARCH) + set(CUDA_ARCH "MAX" CACHE STRING "Cuda architecture") +endif() if (CUDA_ARCH STREQUAL "MIN" OR CUDA_ARCH STREQUAL "MAX" OR CUDA_ARCH STREQUAL "COMP") set(OUTPUTFILE ${PROJECT_BINARY_DIR}/cuda_arch) # No suffix required @@ -69,6 +76,9 @@ else() message(STATUS "Cuda architecture manually set to ${CUDA_ARCH}") set(ARCH_FLAG "-arch=${CUDA_ARCH}") endif() +if(DEFINED OVERRIDE_ARCH_FLAG) + set(ARCH_FLAG ${OVERRIDE_ARCH_FLAG}) +endif() find_package(ZLIB REQUIRED) diff --git a/checker/plotting/csv_plotter.py b/checker/plotting/csv_plotter.py new file mode 100755 index 0000000000000000000000000000000000000000..f2fec37628ecf2846b428cec7c994406ad15947e --- /dev/null +++ b/checker/plotting/csv_plotter.py @@ -0,0 +1,111 @@ +#!/usr/bin/python3 +import csv +import subprocess +import traceback +from optparse import OptionParser +from termgraph import TermGraph + + +def format_text(title, plot_data, options): + # Prepare data + final_vals = [] + final_tags = [] + + keylist = sorted(plot_data.keys(), + key=lambda x: plot_data[x], + reverse=True) + for k in keylist: + val = plot_data[k] + final_tags.append(k) + final_vals.append(val) + + # Plot + print(final_tags) + print(final_vals) + tg = TermGraph(suffix=options.unit, x_max=options.x_max) + output = tg.chart(final_vals, final_tags) + + text = '{"text": "%s\n```\n%s```"}' % (title, output) + return text + + +def send_to_mattermost(text, mattermost_url): + subprocess.call([ + "curl", "-i", "-X", "POST", "-H", 'Content-Type: application/json', + "-d", text, mattermost_url + ]) + + +""" +Produces a plot of the performance breakdown of the sequence under execution +""" + + +def main(): + usage = '%prog [options] <-d data_file>\n' + \ + 'Example: %prog -d data.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"' + parser = OptionParser(usage=usage) + parser.add_option( + '-m', + '--mattermost_url', + dest='mattermost_url', + help='The url where to post outputs generated for mattermost') + parser.add_option( + '-u', + '--unit', + dest='unit', + default='', + help='A unit suffix to append to evey value. Default is an empty string' + ) + parser.add_option( + '-x', + '--x_max', + dest='x_max', + default=50, + type=float, + help='Graph X axis is at least this many units wide. (default=50)') + parser.add_option('-t', + '--title', + dest='title', + default='', + help='Title for your graph. (default: empty string)') + parser.add_option( + '-s', + '--scale', + dest='scale', + default=1.0, + type=float, + help='Multiply all data values by this number (default=1.0)') + parser.add_option( + '-n', + '--normalize', + dest='normalize', + action='store_true', + default=False, + help='Scale numbers according to lowest value (default: False)') + + (options, args) = parser.parse_args() + + plot_data = {} + with open(args[0]) as csvfile: + csv_reader = csv.reader(csvfile, delimiter=',') + for row in csv_reader: + try: + plot_data[row[0]] = float(row[1]) * options.scale + except: + print(traceback.format_exc()) + + # Convert throughputs to speedups + if options.normalize: + norm = min(plot_data.values()) + for k in plot_data.keys(): + plot_data[k] /= norm + + text = format_text(options.title, plot_data, options) + print(text) + if options.mattermost_url is not None: + send_to_mattermost(text, options.mattermost_url) + + +if __name__ == "__main__": + main() diff --git a/checker/plotting/extract_algo_breakdown.py b/checker/plotting/extract_algo_breakdown.py new file mode 100755 index 0000000000000000000000000000000000000000..1ebaab9519ec5781fce6bad2f04b1fed48c8f042 --- /dev/null +++ b/checker/plotting/extract_algo_breakdown.py @@ -0,0 +1,107 @@ +#!/usr/bin/python3 +import sys +import os +import re +import traceback +import operator +import csv +from group_algos import group_algos +from optparse import OptionParser + +""" +Produces a plot of the performance breakdown of the sequence under execution +""" +def main(argv): + global final_msg + parser = OptionParser() + parser.add_option('-d', '--dir', dest='output_directory', help='The directory to scan for build_* directories') + parser.add_option('-f', '--file_pattern', dest='file_pattern', default='profiler_output.txt', + help='The file name to look for profiler data in each build_ directoy. default: profiler_output.txt') + + (options, args) = parser.parse_args() + + if options.output_directory is None: + parser.print_help() + print('Please specify an input directory') + return + + try: + dirs = [] + files = os.listdir(options.output_directory) + + except: + print('Failed to read profiler output directory: %s' % options.dir) + traceback.print_exc() + return + + dirs = [] + for file in files: + if file.startswith('output_'): + dirs.append(file) + + for dir in dirs: + filepath = options.output_directory +"/" + dir + "/" + options.file_pattern + try: + f = open(filepath) + s = f.read() + f.close() + except: + print('Error while trying to read profiler file: %s' % filepath) + traceback.print_exc() + continue + + # Fetch all timings into timings[0] + start_s = "GPU activities:" + end_s = "API calls:" + timings = re.findall(start_s + "(.*?)" + end_s, s, re.DOTALL) + try: + perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0] + perf = float(perf) + except: + print('Failed to read performance data from output') + print(traceback.format_exc()) + + try: + runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0] + runtime = float(runtime) + except: + print('Failed to read runtime from output') + print(traceback.format_exc()) + + + # Regexp for one line + # Note: An algorithm line looks like: + # 11.08% 6.47377s 700 9.2482ms 3.7639ms 15.887ms lf_search_uv_windows(unsigned int const *, unsigned int const *, int const *, SciFi::TrackHits const *, int const *, char const *, LookingForward::Constants const *, float const *, MiniState const *, short*) + # Note: Intended behaviour: Does *not* match nvidia calls like: + # 0.04% 20.484ms 9100 2.2500us 832ns 16.255us [CUDA memcpy DtoH] + regexp_expression = ".*?([0-9]+\.[0-9]+)\%.*[um]s ([a-zA-Z][a-zA-Z\_0-9]+).*" + + algorithm_times = {} + + for line in timings[0].split("\n"): + m = re.match(regexp_expression, line) + if m: + algorithm_times[m.group(2)] = float(m.group(1)) + + output_list = sorted(algorithm_times.items(), key=operator.itemgetter(1), reverse=True) + + print(output_list) + + output_path = options.output_directory +"/" + dir + "/algo_breakdown.csv" + with open(output_path, 'w') as out: + csv_out = csv.writer(out) + for row in output_list: + csv_out.writerow(row) + + timings = group_algos(algorithm_times) + print(timings) + + output_path = options.output_directory +"/" + dir + "/algo_summary.csv" + with open(output_path, 'w') as out: + csv_out = csv.writer(out) + for row in timings: + csv_out.writerow(row) + +if __name__ == "__main__": + main(sys.argv[1:]) + diff --git a/checker/plotting/group_algos.py b/checker/plotting/group_algos.py new file mode 100644 index 0000000000000000000000000000000000000000..80ed6e957919b831ff6ceed37477343eef559530 --- /dev/null +++ b/checker/plotting/group_algos.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +import operator + +def group_algos(algorithm_times): + # Algorithms of each sequence + velo_algorithms = ["consolidate_velo_tracks", "copy_velo_track_hit_number", "estimate_input_size", "masked_velo_clustering", "calculate_phi_and_sort", "search_by_triplet", "fill_candidates", "weak_tracks_adder", "copy_and_prefix_sum_single_block"] + pv_algorithms = ["pv_beamline_peak", "pv_beamline_multi_fitter", "pv_beamline_histo", "pv_beamline_extrapolate"] + ut_algorithms = ["consolidate_ut_tracks", "copy_ut_track_hit_number", "ut_decode_raw_banks_in_order", "ut_pre_decode", "ut_find_permutation", "ut_calculate_number_of_hits", "compass_ut", "ut_search_windows"] + scifi_algorithms = ["scifi_pre_decode_v4", "scifi_raw_bank_decoder_v4", "scifi_calculate_cluster_count_v4", "scifi_direct_decoder_v4", "consolidate_scifi_tracks", "copy_scifi_track_hit_number", \ + "lf_search_initial_windows", "lf_collect_candidates", "lf_prefix_sum_candidates", "lf_triplet_seeding", "lf_triplet_keep_best", "lf_extend_tracks_x", \ + "lf_quality_filter_x", "lf_search_uv_windows", "lf_extend_tracks_uv", "lf_quality_filter_length", "lf_fit", "lf_quality_filter"] + kalman_algorithms = ["velo_filter", "velo_kalman_fit"] + # Order of labels + labels_order = ["Velo", "PV", "UT", "SciFi", "Kalman", "Common"] + timings = {"Velo": {"algorithms": velo_algorithms, "value": 0}, + "PV": {"algorithms": pv_algorithms, "value": 0}, + "UT": {"algorithms": ut_algorithms, "value": 0}, + "SciFi": {"algorithms": scifi_algorithms, "value": 0}, + "Kalman": {"algorithms": kalman_algorithms, "value": 0}, + "Common": {"algorithms": [], "value": 0}, + } + full_addition = sum(algorithm_times.values()) + for algo, value in algorithm_times.items(): + found = False + for key, algorithm_timing in timings.items(): + algorithms = algorithm_timing["algorithms"] + if algo in algorithms: + timings[key]["value"] += 100 * value / full_addition + found = True + break + if not found: + timings["Common"]["value"] += 100 * value / full_addition + simple_timings = {k:v["value"] for k,v in timings.items()} + output_list = sorted(simple_timings.items(), key=operator.itemgetter(1), reverse=True) + return output_list diff --git a/checker/plotting/performance_breakdown.py b/checker/plotting/performance_breakdown.py index f92c5d8c6f556d25e30beeb3da9df664c145e784..9e3b1b675baa6eb16d60d959fd2a793baa2ddb2b 100755 --- a/checker/plotting/performance_breakdown.py +++ b/checker/plotting/performance_breakdown.py @@ -74,7 +74,7 @@ def main(argv): pv_algorithms = ["pv_beamline_peak", "pv_beamline_multi_fitter", "pv_beamline_histo", "pv_beamline_extrapolate"] ut_algorithms = ["consolidate_ut_tracks", "copy_ut_track_hit_number", "ut_decode_raw_banks_in_order", "ut_pre_decode", "ut_find_permutation", "ut_calculate_number_of_hits", "compass_ut", "ut_search_windows"] scifi_algorithms = ["scifi_pre_decode_v4", "scifi_raw_bank_decoder_v4", "scifi_calculate_cluster_count_v4", "scifi_direct_decoder_v4", "consolidate_scifi_tracks", "copy_scifi_track_hit_number"] - kalman_algorithms = ["KalmanFilter"] + kalman_algorithms = ["velo_kalman", "velo_filter"] # Convert values to percentages full_addition = sum(algorithm_times.values()) diff --git a/checker/plotting/post_telegraf.py b/checker/plotting/post_telegraf.py new file mode 100755 index 0000000000000000000000000000000000000000..e963c156efec705dddff044a77d73c52335d366b --- /dev/null +++ b/checker/plotting/post_telegraf.py @@ -0,0 +1,153 @@ +#!/usr/bin/python3 +import sys +import os +import re +import traceback +import operator +import csv +import requests +import time +from optparse import OptionParser +from group_algos import group_algos + + +def send_to_telegraf(performance, runtime, timings, device, options): + session = requests.session() + session.trust_env = False + now = time.time() + timestamp = int(now) * 1000000000 + + telegraf_string = "AllenCIPerformance,branch=%s,device=%s,sequence=%s " % (options.branch, device, options.sequence) + for label,timing in timings: + print(label, timing) + telegraf_string += '{}={:.2f},'.format(label,timing) + + telegraf_string += "performance=%.2f,runtime=%.2f " % (performance, runtime) + telegraf_string += " %d" % timestamp + + try: + print('Sending telegraf string: %s' % telegraf_string) + response = session.post(options.telegraf_url, data=telegraf_string) + #print('http response: %s' % response.headers) + except: + print('Failed to submit data string %s' % telegraf_string) + print(traceback.format_exc()) + + +""" +Produces a plot of the performance breakdown of the sequence under execution +""" +def main(argv): + global final_msg + parser = OptionParser() + parser.add_option('-d', '--dir', dest='output_directory', help='The directory to scan for build_* directories') + parser.add_option('-f', '--file_pattern', dest='file_pattern', default='profiler_output.txt', + help='The file name to look for profiler data in each build_ directoy. default: profiler_output.txt') + parser.add_option('-b', '--branch', dest='branch', default = 'UNKNOWN', help='branch tag to be forwarded to telegraf/grafana') + parser.add_option('-s', '--sequence', dest='sequence', default = 'UNKNOWN', help='sequence name tag to be forwarded to telegraf/grafana') + parser.add_option('-t', '--telegraf_url', dest='telegraf_url', default = 'http://localhost:8186/telegraf', help='URL to send telegraf output to') + + + (options, args) = parser.parse_args() + + if options.output_directory is None: + parser.print_help() + print('Please specify an input directory') + return + + try: + dirs = [] + files = os.listdir(options.output_directory) + + except: + print('Failed to read profiler output directory: %s' % options.dir) + traceback.print_exc() + return + + dirs = [] + for file in files: + if file.startswith('output_'): + dirs.append(file) + + for d in dirs: + filepath = options.output_directory +"/" + d + "/" + options.file_pattern + try: + f = open(filepath) + s = f.read() + f.close() + except: + print('Error while trying to read profiler file: %s' % filepath) + traceback.print_exc() + continue + + try: + device = d.split('_')[1] + except: + traceback.print_exc() + print('Could not extract device name from directory name: %s' % d) + continue + + # Fetch all timings into timings[0] + start_s = "GPU activities:" + end_s = "API calls:" + timings = re.findall(start_s + "(.*?)" + end_s, s, re.DOTALL) + try: + perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0] + perf = float(perf) + except: + print('Failed to read performance data from output') + print(traceback.format_exc()) + + try: + runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0] + runtime = float(runtime) + except: + print('Failed to read runtime from output') + print(traceback.format_exc()) + + + # Regexp for one line + # Note: An algorithm line looks like: + # 11.08% 6.47377s 700 9.2482ms 3.7639ms 15.887ms lf_search_uv_windows(unsigned int const *, unsigned int const *, int const *, SciFi::TrackHits const *, int const *, char const *, LookingForward::Constants const *, float const *, MiniState const *, short*) + # Note: Intended behaviour: Does *not* match nvidia calls like: + # 0.04% 20.484ms 9100 2.2500us 832ns 16.255us [CUDA memcpy DtoH] + regexp_expression = ".*?([0-9]+\.[0-9]+)\%.*[um]s ([a-zA-Z][a-zA-Z\_0-9]+).*" + + algorithm_times = {} + + for line in timings[0].split("\n"): + m = re.match(regexp_expression, line) + if m: + algorithm_times[m.group(2)] = float(m.group(1)) + + output_list = sorted(algorithm_times.items(), key=operator.itemgetter(1), reverse=True) + print('Algorithm Times:') + print(output_list) + + try: + perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0] + perf = float(perf) + except: + print('Failed to read performance data from output') + print(traceback.format_exc()) + + try: + runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0] + runtime = float(runtime) + except: + print('Failed to read runtime from output') + print(traceback.format_exc()) + + print('Algorithm Group Times:') + timings = group_algos(algorithm_times) + + print(timings) + + print('Throughput: %.2f' % (perf)) + print('Runtime: %.2f' % (runtime)) + + send_to_telegraf(perf, runtime, timings, device, options) + +if __name__ == "__main__": + main(sys.argv[1:]) + diff --git a/checker/plotting/speedup_cli_plot.py b/checker/plotting/speedup_cli_plot.py new file mode 100755 index 0000000000000000000000000000000000000000..0a2314f0b01917354aba5d23e58cb4794fa80412 --- /dev/null +++ b/checker/plotting/speedup_cli_plot.py @@ -0,0 +1,230 @@ +#!/usr/bin/python3 +import csv +import subprocess +import os +import re + +####################################################################### +# From termgraph +# https://github.com/mkaz/termgraph/ +####################################################################### + +tg_width = 50 +tg_format = '{:<4.2f}' +DELIM = ',' +TICK = 'â–‡' +SM_TICK = 'â–' + + +def find_max_label_length(labels): + """Return the maximum length for the labels.""" + length = 0 + for i in range(len(labels)): + if len(labels[i]) > length: + length = len(labels[i]) + + return length + + +def normalize(data, width): + """Normalize the data and return it.""" + # min_dat = find_min(data) + min_dat = data[-1] + # We offset by the minimum if there's a negative. + off_data = [] + if min_dat < 0: + min_dat = abs(min_dat) + for dat in data: + off_data.append([_d + min_dat for _d in dat]) + else: + off_data = data + # min_dat = find_min(off_data) + # max_dat = find_max(off_data) + min_dat = off_data[-1] + max_dat = off_data[0] + + if max_dat < width: + # Don't need to normalize if the max value + # is less than the width we allow. + return off_data + + # max_dat / width is the value for a single tick. norm_factor is the + # inverse of this value + # If you divide a number to the value of single tick, you will find how + # many ticks it does contain basically. + norm_factor = width / float(max_dat) + normal_dat = [] + for dat in off_data: + normal_dat.append([_v * norm_factor for _v in dat]) + + return normal_dat + + +def horiz_rows(labels, data, normal_dat): + global final_msg + """Prepare the horizontal graph. + Each row is printed through the print_row function.""" + # val_min = find_min(data) + val_min = data[-1] + + for i in range(len(labels)): + label = "{:<{x}}: ".format(labels[i], x=find_max_label_length(labels)) + + values = data[i] + num_blocks = normal_dat[i] + + for j in range(1): + # In Multiple series graph 1st category has label at the beginning, + # whereas the rest categories have only spaces. + if j > 0: + len_label = len(label) + label = ' ' * len_label + tail = ' {}'.format(tg_format.format(values)) + color = None + # print(label, end="") + final_msg += label + yield (values, int(num_blocks), val_min, color) + final_msg += tail + 'x\n' + + +# Prints a row of the horizontal graph. +def print_row(value, num_blocks, val_min, colors): + global final_msg + """A method to print a row for a horizontal graphs. + + i.e: + 1: ▇▇ 2 + 2: ▇▇▇ 3 + 3: ▇▇▇▇ 4 + """ + + if num_blocks < 1 and (value > val_min or value > 0): + # Print something if it's not the smallest + # and the normal value is less than one. + # sys.stdout.write(SM_TICK) + # print(SM_TICK, end="") + final_msg += SM_TICK + else: + for _ in range(num_blocks): + # sys.stdout.write(TICK) + # print(TICK, end="") + final_msg += TICK + + +def chart(data, labels): + # One category/Multiple series graph with same scale + # All-together normalization + normal_dat = normalize(data, tg_width) + for row in horiz_rows(labels, data, normal_dat): + print_row(*row) + + +####################################################################### +# Finish termgraph +####################################################################### + +import traceback +from optparse import OptionParser +from termgraph import TermGraph + + +def format_text(title, algorithm_times, options): + # Prepare data + final_vals = [] + final_tags = [] + + keylist = sorted(algorithm_times.keys(), + key=lambda x: algorithm_times[x], + reverse=True) + for k in keylist: + val = algorithm_times[k] + final_tags.append(k) + final_vals.append(val) + + # Plot + print(final_tags) + print(final_vals) + tg = TermGraph(suffix=options.unit, x_max=options.x_max) + final_msg = tg.chart(final_vals, final_tags) + + text = '{"text": "%s\n```\n%s```"}' % (title, final_msg) + return text + + +def send_to_mattermost(text, mattermost_url): + subprocess.call([ + "curl", "-i", "-X", "POST", "-H", 'Content-Type: application/json', + "-d", text, mattermost_url + ]) + + +""" +Produces a plot of the performance breakdown of the sequence under execution +""" + + +def main(): + usage = '%prog [options] <-d data_file>\n' + \ + 'Example: %prog -d data.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"' + parser = OptionParser(usage=usage) + parser.add_option( + '-m', + '--mattermost_url', + dest='mattermost_url', + help='The url where to post outputs generated for mattermost') + parser.add_option('-d', + '--data_file', + dest='data_file', + help='Path to a data file to plot') + parser.add_option( + '-u', + '--unit', + dest='unit', + default='', + help='A unit suffix to append to evey value. Default is an empty string' + ) + parser.add_option( + '-x', + '--x_max', + dest='x_max', + default=50, + help='Graph X axis is at least this many units wide. (default=50)') + parser.add_option('-t', + '--title', + dest='title', + default='', + help='Title for your graph. (default: empty string)') + (options, args) = parser.parse_args() + + if options.data_file is None: + parser.print_help() + + try: + options.x_max = float(options.x_max) + except: + parser.print_help() + print('\n-x has to be a convertible floating point value!\n') + return -1 + + algorithm_times = {} + with open(options.data_file) as csvfile: + csv_reader = csv.reader(csvfile, delimiter=',') + for row in csv_reader: + try: + algorithm_times[row[0]] = float(row[1]) + except: + print(traceback.format_exc()) + + # Convert throughputs to speedups + base_speed = min(algorithm_times.values()) + for k in algorithm_times.keys(): + algorithm_times[k] /= base_speed + + text = format_text(options.title, algorithm_times, options) + print(text) + if options.mattermost_url is not None: + send_to_mattermost(text, options.mattermost_url) + + +if __name__ == "__main__": + main() diff --git a/checker/plotting/speedup_sample.dat b/checker/plotting/speedup_sample.dat new file mode 100644 index 0000000000000000000000000000000000000000..688ec80bec741af64355e6131b3441f042d2db9d --- /dev/null +++ b/checker/plotting/speedup_sample.dat @@ -0,0 +1,8 @@ +!!SAMPLE!! GTX 1060 6GB,7519.354825 +!!SAMPLE!! GTX 1080 Ti,18982.238479 +!!SAMPLE!! GTX 670,2297.042902 +!!SAMPLE!! GTX 680,2524.368385 +!!SAMPLE!! GTX TITAN X,11117.198792 +!!SAMPLE!! RTX 2080 Ti,43705.972936 +!!SAMPLE!! T4,22077.601157 +!!SAMPLE!! V100-PCIE-32GB,47568.303480 diff --git a/checker/plotting/termgraph.py b/checker/plotting/termgraph.py new file mode 100755 index 0000000000000000000000000000000000000000..857ade16ee33472b482cdf80224780706e8da167 --- /dev/null +++ b/checker/plotting/termgraph.py @@ -0,0 +1,203 @@ +#!/usr/bin/python3 + +import math + +####################################################################### +# From termgraph +# https://github.com/mkaz/termgraph/ +####################################################################### + + +class TermGraph: + def __init__(self, tg_width=50, tg_format='{:<4.2f}', delim=',', tick='â–ˆ', sm_tick='â–Œ', suffix="", x_max = 50): + self.tg_width = tg_width + self.tg_format = tg_format + self.DELIM = delim + self.TICK = tick + self.SM_TICK = sm_tick + self.text = "" + self.suffix = suffix + self.x_max = x_max + self.big_tick = "┼" + self.small_tick = "â”´" + self.horiz = "─" + self.label_length = 0 + + def find_max_label_length(self, labels): + """Return the maximum length for the labels.""" + length = 0 + for i in range(len(labels)): + if len(labels[i]) > length: + length = len(labels[i]) + + self.label_length = length + return length # Yes, yes, i know ... + + def getScale(self, data): + # min_dat = find_min(data) + self.min_dat = min(data) + if max(data) < self.x_max: + self.max_dat = self.x_max + else: + self.max_dat = max(data) + #epsilon = (maximum - minimum) / 1e6 + #maximum += epsilon + #minimum -= epsilon + rr = self.max_dat - self.min_dat + stepCount = 10 + roughStep = rr / (stepCount -1) + + goodNormalizedSteps = [1, 2, 5, 10] + stepPower = math.pow(10, -math.floor(math.log10(abs(roughStep)))) + normalizedStep = roughStep * stepPower + goodNormalizedStep = list(filter(lambda x: x > normalizedStep, goodNormalizedSteps))[0] + self.step = int(goodNormalizedStep / stepPower) + self.scaleMax = int(math.ceil(self.max_dat / self.step) * self.step) + self.scaleMin = int(math.floor(self.min_dat / self.step) * self.step) + self.strlen = max(len(str(int(self.scaleMin))), len(str(int(self.scaleMax)))) + print(self.strlen) + self.nSteps = int((self.scaleMax - self.scaleMin) / self.step) + print(self.scaleMin, self.scaleMax, self.step, self.nSteps) + + self.tick_dist = int(self.tg_width / (self.scaleMax - self.scaleMin) * self.step / 2) + print(self.tick_dist) + + self.tg_width = int(self.tick_dist * 2 * self.nSteps) + print('Updating tg_width to: %d' % self.tg_width) + return + + def numLen(self, num): + return len(str(int(num))) + + def printAxis(self): + self.text += " " * (self.label_length + 1) + self.text += self.big_tick + + for i in range(0, self.nSteps * 2): + self.text += self.horiz * int(self.tick_dist - 1) + if i % 2 == 0: + self.text += self.small_tick + else: + self.text += self.big_tick + + self.text += "\n" + + l = self.numLen(self.scaleMin) + l = int(l/2) + self.text += " " * (self.label_length - l - self.tick_dist + 2) + for i in range(self.scaleMin, self.scaleMax + self.step, self.step): + self.text += '{:^{width}}'.format(str(i), width = '%d' % (self.tick_dist * 2)) + self.text += "\n" + + def normalize(self, data, width): + """Normalize the data and return it.""" + + # We offset by the minimum if there's a negative. + off_data = [] + if self.min_dat < 0: + self.min_dat = abs(self.min_dat) + for dat in data: + off_data.append(self.min_dat + dat) + else: + off_data = data + #self.max_dat += abs(self.min_dat) + + #if self.max_dat < self.x_max: + # Don't need to normalize if the max value + # is less than the width we allow. + #return off_data + # self.max_dat = self.x_max + + # max_dat / width is the value for a single tick. norm_factor is the + # inverse of this value + # If you divide a number to the value of single tick, you will find how + # many ticks it does contain basically. + print('width: %d, max_dat: %f' % (width, self.scaleMax)) + norm_factor = width / float(self.scaleMax) + normal_dat = [] + for dat in off_data: + normal_dat.append(dat * norm_factor) + + + return normal_dat + + def horiz_rows(self, labels, data, normal_dat): + """Prepare the horizontal graph. + Each row is printed through the print_row function.""" + val_min = min(data) + + for i in range(len(labels)): + label = "{:<{x}} │".format(labels[i], x=self.find_max_label_length(labels)) + + values = data[i] + num_blocks = normal_dat[i] + + for j in range(1): + # In Multiple series graph 1st category has label at the beginning, + # whereas the rest categories have only spaces. + if j > 0: + len_label = len(label) + label = ' ' * len_label + tail = ' {} %s'.format(self.tg_format.format(values)) % self.suffix + color = None + # print(label, end="") + self.text += label + yield(values, int(num_blocks), val_min, color) + self.text += tail + '\n' + + # Prints a row of the horizontal graph. + + def print_row(self, value, num_blocks, val_min, color): + """A method to print a row for a horizontal graphs. + + i.e: + 1: ▇▇ 2 + 2: ▇▇▇ 3 + 3: ▇▇▇▇ 4 + """ + + if num_blocks < 1 and (value >= val_min or value > 0): + # Print something if it's not the smallest + # and the normal value is less than one + # sys.stdout.write(SM_TICK) + # print(SM_TICK, end="") + self.text += self.SM_TICK + else: + for _ in range(num_blocks): + # sys.stdout.write(TICK) + # print(TICK, end="") + self.text += self.TICK + + for _ in range(max([num_blocks,1]), self.tg_width): + self.text += ' ' + + def chart(self, data, labels): + # One category/Multiple series graph with same scale + # All-together normalization + self.text="" + self.getScale(data) + normal_dat = self.normalize(data, self.tg_width) + for row in self.horiz_rows(labels, data, normal_dat): + self.print_row(*row) + self.printAxis() + + return self.text + + +####################################################################### +# Finish termgraph +####################################################################### + + +def main(): + g = TermGraph(suffix='Hz') + data = [-100, 500, 0, -111, 222.324324] + labels = ['foo', 'bar', 'banana', 'monkey', 'fish'] + print(g.chart(data, labels)) + + +#Small test application + +if __name__ == '__main__': + main() + diff --git a/checker/pv/CMakeLists.txt b/checker/pv/CMakeLists.txt index aa3ebbc80405175f817ca201d4b3ff015ab12521..5f11a9eb4d8c8e8a89d31b82f8ebb6089caac285 100644 --- a/checker/pv/CMakeLists.txt +++ b/checker/pv/CMakeLists.txt @@ -11,7 +11,7 @@ include_directories(${ROOT_INCLUDE_DIRS}) file(GLOB pv_checker_sources "src/*cpp") -add_library(PVChecking SHARED +add_library(PVChecking STATIC ${pv_checker_sources} ) diff --git a/checker/pv/include/PrimaryVertexChecker.h b/checker/pv/include/PrimaryVertexChecker.h index 7eecacdced46dd9ef70f33a5f8453c53ee28a517..c8ffa87edafd030e512cd523e898ecc6e57ce972 100644 --- a/checker/pv/include/PrimaryVertexChecker.h +++ b/checker/pv/include/PrimaryVertexChecker.h @@ -65,64 +65,8 @@ public: PV::Vertex* pRECPV; // pointer to REC PV } RecPVInfo; -void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec) -{ +void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec); - double mindist = 999999.; - int indexmc = -1; +void printRat(std::string mes, int a, int b); - for (int imc = 0; imc < (int) mcpvvec.size(); imc++) { - double dist = fabs(mcpvvec[imc].pMCPV->z - rinfo[ipv].z); - if (dist < mindist) { - mindist = dist; - indexmc = imc; - } - } - if (indexmc > -1) { - if (mindist < 5.0 * rinfo[ipv].positionSigma.z) { - rinfo[ipv].indexMCPVInfo = indexmc; - mcpvvec[indexmc].indexRecPVInfo = ipv; - mcpvvec[indexmc].number_rec_vtx++; - } - } -} - -void printRat(std::string mes, int a, int b) -{ - - float rat = 0.f; - if (b > 0) rat = 1.0f * a / b; - - // reformat message - unsigned int len = 20; - std::string pmes = mes; - while (pmes.length() < len) { - pmes += " "; - } - pmes += " : "; - - info_cout << pmes << " " << rat << "( " << a << " / " << b << " )" << std::endl; -} - -std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc) -{ - - std::vector<MCPVInfo>::iterator itret = rblemcpv.end(); - double mindist = 999999.; - if (rblemcpv.size() < 2) return itret; - std::vector<MCPVInfo>::iterator it; - for (it = rblemcpv.begin(); it != rblemcpv.end(); it++) { - if (it->pMCPV != itmc->pMCPV) { - double diff_x = it->pMCPV->x - itmc->pMCPV->x; - double diff_y = it->pMCPV->y - itmc->pMCPV->y; - double diff_z = it->pMCPV->z - itmc->pMCPV->z; - double dist = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z); - - if (dist < mindist) { - mindist = dist; - itret = it; - } - } - } - return itret; -} +std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc); diff --git a/checker/pv/src/PrimaryVertexChecker.cpp b/checker/pv/src/PrimaryVertexChecker.cpp index 8386f78b17d41103214d69b224b9482bb8fe9c37..cd0a3aa306e2215a8abce77e8aebe9b11cdf50cb 100644 --- a/checker/pv/src/PrimaryVertexChecker.cpp +++ b/checker/pv/src/PrimaryVertexChecker.cpp @@ -540,3 +540,65 @@ void checkPVs( #endif } } + +void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec) +{ + + double mindist = 999999.; + int indexmc = -1; + + for (int imc = 0; imc < (int) mcpvvec.size(); imc++) { + double dist = fabs(mcpvvec[imc].pMCPV->z - rinfo[ipv].z); + if (dist < mindist) { + mindist = dist; + indexmc = imc; + } + } + if (indexmc > -1) { + if (mindist < 5.0 * rinfo[ipv].positionSigma.z) { + rinfo[ipv].indexMCPVInfo = indexmc; + mcpvvec[indexmc].indexRecPVInfo = ipv; + mcpvvec[indexmc].number_rec_vtx++; + } + } +} + +void printRat(std::string mes, int a, int b) +{ + + float rat = 0.f; + if (b > 0) rat = 1.0f * a / b; + + // reformat message + unsigned int len = 20; + std::string pmes = mes; + while (pmes.length() < len) { + pmes += " "; + } + pmes += " : "; + + info_cout << pmes << " " << rat << "( " << a << " / " << b << " )" << std::endl; +} + +std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc) +{ + + std::vector<MCPVInfo>::iterator itret = rblemcpv.end(); + double mindist = 999999.; + if (rblemcpv.size() < 2) return itret; + std::vector<MCPVInfo>::iterator it; + for (it = rblemcpv.begin(); it != rblemcpv.end(); it++) { + if (it->pMCPV != itmc->pMCPV) { + double diff_x = it->pMCPV->x - itmc->pMCPV->x; + double diff_y = it->pMCPV->y - itmc->pMCPV->y; + double diff_z = it->pMCPV->z - itmc->pMCPV->z; + double dist = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z); + + if (dist < mindist) { + mindist = dist; + itret = it; + } + } + } + return itret; +} diff --git a/x86/velo/clustering/CMakeLists.txt b/x86/velo/clustering/CMakeLists.txt index f9e09f74a4ff68c70d37eb2c3b2c50ed45f12bd5..99298002ba994349e2578d2d90ad59f21f1dddd9 100644 --- a/x86/velo/clustering/CMakeLists.txt +++ b/x86/velo/clustering/CMakeLists.txt @@ -7,4 +7,4 @@ file(GLOB x86_clustering "src/*cpp") # to do: why do I need nvcc to compile the x86 code? # it depends on clustering functions in cuda/velo/mask_clustering, # we should make them __host__ and __device__ functions -add_library(x86Clustering SHARED ${x86_clustering}) +add_library(x86Clustering STATIC ${x86_clustering})