diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..209fb9abdcb3900c3e43d92228209c7865627553
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,248 @@
+stages:
+  - build
+  - run
+  - publish
+
+.build_job: &build_job_def
+  only:
+    refs:
+      - master
+      - schedules
+      - web
+
+  stage: build
+  script:
+    - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS}
+    - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES}
+    - PREVIOUS_IFS=${IFS}
+    - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}"
+    - IFS=${PREVIOUS_IFS}
+    - SEQUENCE=${JOB_NAME_SPLIT[0]}
+    - BUILD_ARCH_FLAG="-gencode arch=compute_75,code=sm_75 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_30,code=sm_30"
+    - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH
+    - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh
+    - mkdir build
+    - cd build
+    - cmake -DSEQUENCE=${SEQUENCE} -DOVERRIDE_ARCH_FLAG="${BUILD_ARCH_FLAG}" -DCPU_ARCH="ivybridge" ..
+    - make -j
+  artifacts:
+    name: "$CI_JOB_NAME"
+    expire_in: 2 hrs
+    paths:
+      - build*/*Allen*
+      - input
+  retry: 1
+
+.run_throughput_job: &run_throughput_job_def
+  only:
+    refs:
+      - master
+      - schedules
+      - web
+  stage: run
+  script:
+    - TOPLEVEL=${PWD}
+    - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS}
+    - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES}
+    - declare -A DEVICE_MEMORY_MAP=${DEVICE_MEMORY}
+    - PREVIOUS_IFS=${IFS}
+    - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}"
+    - IFS=${PREVIOUS_IFS}
+    - DEVICE_ID=${JOB_NAME_SPLIT[0]}
+    - SEQUENCE=${JOB_NAME_SPLIT[1]}
+    - D_NUMBER=${DEVICE_NUMBERS_MAP[${DEVICE_ID}]}
+    - D_CAPABILITY=${DEVICE_CAPABILITIES_MAP[${DEVICE_ID}]}
+    - D_MEMORY=${DEVICE_MEMORY_MAP[${DEVICE_ID}]}
+    - RUN_OPTIONS="-n 1000 -r 100 -t 8 -c 0"
+    - if [ "${D_MEMORY}" = "LOW" ]; then
+    -   RUN_OPTIONS="-n 1000 -r 100 -t 2 -m 700 -c 0"
+    - fi
+    - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH
+    - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh
+    - mkdir output_${DEVICE_ID}
+    - cd build
+    - ls
+    - export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
+    - CUDA_VISIBLE_DEVICES=${D_NUMBER} ./Allen -f /localprojects/shared/1kevents_minbias_UT_LHCbID_fix ${RUN_OPTIONS} 2>&1 | tee ../output_${DEVICE_ID}/output.txt
+    - CUDA_VISIBLE_DEVICES=${D_NUMBER} nvprof ./Allen -f /localprojects/shared/1kevents_minbias_UT_LHCbID_fix ${RUN_OPTIONS} 2>&1 | tee ../output_${DEVICE_ID}/profiler_output.txt
+    - python3 ${TOPLEVEL}/checker/plotting/extract_algo_breakdown.py -d ${TOPLEVEL}
+  artifacts:
+    name: "$CI_JOB_NAME"
+    expire_in: 2 hrs
+    paths:
+      - output_*
+  allow_failure: true
+
+.throughput_cli_plot_job: &publish_algo_breakdown_plot_def
+  only:
+    refs:
+      - master
+      - schedules
+      - web
+  stage: publish
+  script:
+    - declare -A DEVICE_NUMBERS_MAP=${DEVICE_NUMBERS}
+    - declare -A DEVICE_CAPABILITIES_MAP=${DEVICE_CAPABILITIES}
+    - PREVIOUS_IFS=${IFS}
+    - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}"
+    - IFS=${PREVIOUS_IFS}
+    - DEVICE_ID=${JOB_NAME_SPLIT[0]}
+    - SEQUENCE=${JOB_NAME_SPLIT[1]}
+    - D_NUMBER=${DEVICE_NUMBERS_MAP[${DEVICE_ID}]}
+    - D_CAPABILITY=${DEVICE_CAPABILITIES_MAP[${DEVICE_ID}]}
+    - export PATH=/usr/local/cuda/bin:/cvmfs/lhcb.cern.ch/lib/contrib/CMake/3.12.1/Linux-x86_64/bin/:$PATH
+    - source /cvmfs/lhcb.cern.ch/lib/lcg/releases/gcc/8.2.0/x86_64-centos7/setup.sh
+    - python3 checker/plotting/csv_plotter.py -t "Algorithm Breakdown for ${SEQUENCE}" -u "%" -x 40 -m ${MATTERMOST_KEY} output_${DEVICE_ID}/algo_breakdown.csv
+    - python3 checker/plotting/csv_plotter.py -t "Algorithm Groups for ${SEQUENCE}" -u "%" -m ${MATTERMOST_KEY} output_${DEVICE_ID}/algo_summary.csv
+
+.throughput_throughput_job: &publish_throughput_job_def
+  only:
+    refs:
+      - master
+      - schedules
+      - web
+  stage: publish
+  script:
+    - PREVIOUS_IFS=${IFS}
+    - IFS=':' read -ra JOB_NAME_SPLIT <<< "${CI_JOB_NAME}"
+    - IFS=${PREVIOUS_IFS}
+    - SEQUENCE=${JOB_NAME_SPLIT[1]}
+    - cat output_*/output.txt | grep --color=none "device" | sed 's/.*:\ //' > devices.txt
+    - cat output_*/output.txt | grep --color=none "events/s" | awk '{ print $1; }' > throughputs.txt
+    - cat devices.txt
+    - cat throughputs.txt
+    - paste -d, devices.txt throughputs.txt > devices_throughputs.csv
+    - cat devices_throughputs.csv
+    - python3 checker/plotting/csv_plotter.py -t "Throughputs for ${SEQUENCE}" -u "kHz" -x 70 -s 1e-3 -m ${MATTERMOST_KEY} devices_throughputs.csv
+    - python3 checker/plotting/post_telegraf.py -d . -s ${SEQUENCE} -b ${CI_COMMIT_REF_NAME} 
+
+
+.throughput_speedup_job: &publish_speedup_job_def
+  only:
+    refs:
+      - master
+      - schedules
+      - web
+  stage: publish
+  script:
+    - cat output_*/output.txt | grep --color=none "device" | sed 's/.*:\ //' > devices.txt
+    - cat output_*/output.txt | grep --color=none "events/s" | awk '{ print $1; }' > throughputs.txt
+    - cat devices.txt 
+    - cat throughputs.txt
+    - paste -d, devices.txt throughputs.txt > devices_throughputs.csv
+    - cat devices_throughputs.csv
+    - python3 checker/plotting/csv_plotter.py -n -t "Speedup across GPUs" -u "x" -x 30 -m ${MATTERMOST_KEY} devices_throughputs.csv
+
+# Build on all platforms
+LookingForwardKalman:build:
+  <<: *build_job_def
+  tags:
+    - t4
+
+# Run on all platforms
+rtx2080ti:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - rtx2080ti
+  dependencies:
+    - LookingForwardKalman:build
+
+v100:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - v100
+  dependencies:
+    - LookingForwardKalman:build
+
+t4:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - t4
+  dependencies:
+    - LookingForwardKalman:build
+
+gtx10606g:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtx10606g
+  dependencies:
+    - LookingForwardKalman:build
+
+gtx980:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtx980
+  dependencies:
+    - LookingForwardKalman:build
+
+gtx680:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtx680
+  dependencies:
+    - LookingForwardKalman:build
+
+gtxtitanx:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtxtitanx
+  dependencies:
+    - LookingForwardKalman:build
+
+gtx670:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtx670
+  dependencies:
+    - LookingForwardKalman:build
+
+gtx1080ti:LookingForwardKalman:run_throughput:
+  <<: *run_throughput_job_def
+  tags:
+    - gtx1080ti
+  dependencies:
+    - LookingForwardKalman:build
+
+# Publish
+# Plain results to mattermost
+
+# TODO: Make an intermediate stage for this job, modify script to
+#       support the nvprof output as input
+rtx2080ti:LookingForwardKalman:publish_algo_breakdown_plot:
+  <<: *publish_algo_breakdown_plot_def
+  tags:
+    - gpu
+  dependencies:
+    - rtx2080ti:LookingForwardKalman:run_throughput
+
+throughput:LookingForwardKalman:publish_throughput:
+  <<: *publish_throughput_job_def
+  tags:
+    - gpu
+  dependencies:
+    - rtx2080ti:LookingForwardKalman:run_throughput
+    - v100:LookingForwardKalman:run_throughput
+    - t4:LookingForwardKalman:run_throughput
+    - gtx10606g:LookingForwardKalman:run_throughput
+    - gtx680:LookingForwardKalman:run_throughput
+    - gtxtitanx:LookingForwardKalman:run_throughput
+    - gtx670:LookingForwardKalman:run_throughput
+    - gtx1080ti:LookingForwardKalman:run_throughput
+    - gtx980:LookingForwardKalman:run_throughput
+
+# The "gpu" tag is to require python3 essentially
+speedup:LookingForwardKalman:publish_speedup:
+  <<: *publish_speedup_job_def
+  tags:
+    - gpu
+  dependencies:
+    - rtx2080ti:LookingForwardKalman:run_throughput
+    - v100:LookingForwardKalman:run_throughput
+    - t4:LookingForwardKalman:run_throughput
+    - gtx10606g:LookingForwardKalman:run_throughput
+    - gtx680:LookingForwardKalman:run_throughput
+    - gtx980:LookingForwardKalman:run_throughput
+    - gtxtitanx:LookingForwardKalman:run_throughput
+    - gtx670:LookingForwardKalman:run_throughput
+    - gtx1080ti:LookingForwardKalman:run_throughput
+    - gtx980:LookingForwardKalman:run_throughput
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab5940b3ea100f8b94fe391ff1e65c3ed87efa34..557106b8a2708038e4f39fd98b6ce0a387b2245e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,8 +31,13 @@ if(NOT SEQUENCE)
   set(SEQUENCE DefaultSequence)
 endif(NOT SEQUENCE)
 
+if(NOT DEFINED CPU_ARCH)
+  set(CPU_ARCH native)
+endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${CPU_ARCH}")
+MESSAGE(STATUS "CPU_ARCH: " ${CPU_ARCH})
+
 set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
 set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
 set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG")
@@ -40,7 +45,9 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lpthread")
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 # Configuration of CUDA compute architecture
-set(CUDA_ARCH "MAX" CACHE STRING "Cuda architecture")
+if(NOT DEFINED CUDA_ARCH)
+  set(CUDA_ARCH "MAX" CACHE STRING "Cuda architecture")
+endif()
 
 if (CUDA_ARCH STREQUAL "MIN" OR CUDA_ARCH STREQUAL "MAX" OR CUDA_ARCH STREQUAL "COMP")
   set(OUTPUTFILE ${PROJECT_BINARY_DIR}/cuda_arch) # No suffix required
@@ -69,6 +76,9 @@ else()
   message(STATUS "Cuda architecture manually set to ${CUDA_ARCH}")
   set(ARCH_FLAG "-arch=${CUDA_ARCH}")
 endif()
+if(DEFINED OVERRIDE_ARCH_FLAG)
+  set(ARCH_FLAG ${OVERRIDE_ARCH_FLAG})
+endif()
 
 find_package(ZLIB REQUIRED)
 
diff --git a/checker/plotting/csv_plotter.py b/checker/plotting/csv_plotter.py
new file mode 100755
index 0000000000000000000000000000000000000000..f2fec37628ecf2846b428cec7c994406ad15947e
--- /dev/null
+++ b/checker/plotting/csv_plotter.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python3
+import csv
+import subprocess
+import traceback
+from optparse import OptionParser
+from termgraph import TermGraph
+
+
+def format_text(title, plot_data, options):
+    # Prepare data
+    final_vals = []
+    final_tags = []
+
+    keylist = sorted(plot_data.keys(),
+                     key=lambda x: plot_data[x],
+                     reverse=True)
+    for k in keylist:
+        val = plot_data[k]
+        final_tags.append(k)
+        final_vals.append(val)
+
+    # Plot
+    print(final_tags)
+    print(final_vals)
+    tg = TermGraph(suffix=options.unit, x_max=options.x_max)
+    output = tg.chart(final_vals, final_tags)
+
+    text = '{"text": "%s\n```\n%s```"}' % (title, output)
+    return text
+
+
+def send_to_mattermost(text, mattermost_url):
+    subprocess.call([
+        "curl", "-i", "-X", "POST", "-H", 'Content-Type: application/json',
+        "-d", text, mattermost_url
+    ])
+
+
+"""
+Produces a plot of the performance breakdown of the sequence under execution
+"""
+
+
+def main():
+    usage = '%prog [options] <-d data_file>\n' + \
+            'Example: %prog -d data.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"'
+    parser = OptionParser(usage=usage)
+    parser.add_option(
+        '-m',
+        '--mattermost_url',
+        dest='mattermost_url',
+        help='The url where to post outputs generated for mattermost')
+    parser.add_option(
+        '-u',
+        '--unit',
+        dest='unit',
+        default='',
+        help='A unit suffix to append to evey value. Default is an empty string'
+    )
+    parser.add_option(
+        '-x',
+        '--x_max',
+        dest='x_max',
+        default=50,
+        type=float,
+        help='Graph X axis is at least this many units wide. (default=50)')
+    parser.add_option('-t',
+                      '--title',
+                      dest='title',
+                      default='',
+                      help='Title for your graph. (default: empty string)')
+    parser.add_option(
+        '-s',
+        '--scale',
+        dest='scale',
+        default=1.0,
+        type=float,
+        help='Multiply all data values by this number (default=1.0)')
+    parser.add_option(
+        '-n',
+        '--normalize',
+        dest='normalize',
+        action='store_true',
+        default=False,
+        help='Scale numbers according to lowest value (default: False)')
+
+    (options, args) = parser.parse_args()
+
+    plot_data = {}
+    with open(args[0]) as csvfile:
+        csv_reader = csv.reader(csvfile, delimiter=',')
+        for row in csv_reader:
+            try:
+                plot_data[row[0]] = float(row[1]) * options.scale
+            except:
+                print(traceback.format_exc())
+
+    # Convert throughputs to speedups
+    if options.normalize:
+        norm = min(plot_data.values())
+        for k in plot_data.keys():
+            plot_data[k] /= norm
+
+    text = format_text(options.title, plot_data, options)
+    print(text)
+    if options.mattermost_url is not None:
+        send_to_mattermost(text, options.mattermost_url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/checker/plotting/extract_algo_breakdown.py b/checker/plotting/extract_algo_breakdown.py
new file mode 100755
index 0000000000000000000000000000000000000000..1ebaab9519ec5781fce6bad2f04b1fed48c8f042
--- /dev/null
+++ b/checker/plotting/extract_algo_breakdown.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python3
+import sys
+import os
+import re
+import traceback
+import operator
+import csv
+from group_algos import group_algos
+from optparse import OptionParser
+
+"""
+Produces a plot of the performance breakdown of the sequence under execution
+"""
+def main(argv):
+    global final_msg
+    parser = OptionParser()
+    parser.add_option('-d', '--dir', dest='output_directory', help='The directory to scan for build_* directories')
+    parser.add_option('-f', '--file_pattern', dest='file_pattern', default='profiler_output.txt',
+                      help='The file name to look for profiler data in each build_ directoy. default: profiler_output.txt')
+
+    (options, args) = parser.parse_args()
+
+    if options.output_directory is None:
+        parser.print_help()
+        print('Please specify an input directory')
+        return
+
+    try:
+        dirs = []
+        files = os.listdir(options.output_directory)
+
+    except:
+        print('Failed to read profiler output directory: %s' % options.dir)
+        traceback.print_exc()
+        return
+
+    dirs = []
+    for file in files:
+        if file.startswith('output_'):
+            dirs.append(file)
+
+    for dir in dirs:
+        filepath = options.output_directory +"/" + dir + "/" + options.file_pattern
+        try:
+            f = open(filepath)
+            s = f.read()
+            f.close()
+        except:
+            print('Error while trying to read profiler file: %s' % filepath)
+            traceback.print_exc()
+            continue
+
+        # Fetch all timings into timings[0]
+        start_s = "GPU activities:"
+        end_s = "API calls:"
+        timings = re.findall(start_s + "(.*?)" + end_s, s, re.DOTALL)
+        try:
+            perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0]
+            perf = float(perf)
+        except:
+            print('Failed to read performance data from output')
+            print(traceback.format_exc())
+
+        try:
+            runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0]
+            runtime = float(runtime)
+        except:
+            print('Failed to read runtime from output')
+            print(traceback.format_exc())
+
+
+        # Regexp for one line
+        # Note: An algorithm line looks like:
+        #  11.08%  6.47377s       700  9.2482ms  3.7639ms  15.887ms  lf_search_uv_windows(unsigned int const *, unsigned int const *, int const *, SciFi::TrackHits const *, int const *, char const *, LookingForward::Constants const *, float const *, MiniState const *, short*)
+        # Note: Intended behaviour: Does *not* match nvidia calls like:
+        #  0.04%  20.484ms      9100  2.2500us     832ns  16.255us  [CUDA memcpy DtoH]
+        regexp_expression = ".*?([0-9]+\.[0-9]+)\%.*[um]s  ([a-zA-Z][a-zA-Z\_0-9]+).*"
+
+        algorithm_times = {}
+
+        for line in timings[0].split("\n"):
+            m = re.match(regexp_expression, line)
+            if m:
+                algorithm_times[m.group(2)] = float(m.group(1))
+
+        output_list = sorted(algorithm_times.items(), key=operator.itemgetter(1), reverse=True)
+
+        print(output_list)
+
+        output_path = options.output_directory +"/" + dir + "/algo_breakdown.csv"
+        with open(output_path, 'w') as out:
+            csv_out = csv.writer(out)
+            for row in output_list:
+                csv_out.writerow(row)
+
+        timings = group_algos(algorithm_times)
+        print(timings)
+
+        output_path = options.output_directory +"/" + dir + "/algo_summary.csv"
+        with open(output_path, 'w') as out:
+            csv_out = csv.writer(out)
+            for row in timings:
+                csv_out.writerow(row)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
+
diff --git a/checker/plotting/group_algos.py b/checker/plotting/group_algos.py
new file mode 100644
index 0000000000000000000000000000000000000000..80ed6e957919b831ff6ceed37477343eef559530
--- /dev/null
+++ b/checker/plotting/group_algos.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+
+import operator
+
+def group_algos(algorithm_times):
+    # Algorithms of each sequence
+    velo_algorithms = ["consolidate_velo_tracks", "copy_velo_track_hit_number", "estimate_input_size", "masked_velo_clustering", "calculate_phi_and_sort", "search_by_triplet", "fill_candidates", "weak_tracks_adder", "copy_and_prefix_sum_single_block"]
+    pv_algorithms = ["pv_beamline_peak", "pv_beamline_multi_fitter", "pv_beamline_histo", "pv_beamline_extrapolate"]
+    ut_algorithms = ["consolidate_ut_tracks", "copy_ut_track_hit_number", "ut_decode_raw_banks_in_order", "ut_pre_decode", "ut_find_permutation", "ut_calculate_number_of_hits", "compass_ut", "ut_search_windows"]
+    scifi_algorithms = ["scifi_pre_decode_v4", "scifi_raw_bank_decoder_v4", "scifi_calculate_cluster_count_v4", "scifi_direct_decoder_v4", "consolidate_scifi_tracks", "copy_scifi_track_hit_number", \
+        "lf_search_initial_windows", "lf_collect_candidates", "lf_prefix_sum_candidates", "lf_triplet_seeding", "lf_triplet_keep_best", "lf_extend_tracks_x",  \
+        "lf_quality_filter_x", "lf_search_uv_windows", "lf_extend_tracks_uv", "lf_quality_filter_length", "lf_fit", "lf_quality_filter"]
+    kalman_algorithms = ["velo_filter", "velo_kalman_fit"]
+    # Order of labels
+    labels_order = ["Velo", "PV", "UT", "SciFi", "Kalman", "Common"]
+    timings = {"Velo": {"algorithms": velo_algorithms, "value": 0},
+        "PV": {"algorithms": pv_algorithms, "value": 0},
+        "UT": {"algorithms": ut_algorithms, "value": 0},
+        "SciFi": {"algorithms": scifi_algorithms, "value": 0},
+        "Kalman": {"algorithms": kalman_algorithms, "value": 0},
+        "Common": {"algorithms": [], "value": 0},
+    }
+    full_addition = sum(algorithm_times.values())
+    for algo, value in algorithm_times.items():
+        found = False
+        for key, algorithm_timing in timings.items():
+            algorithms = algorithm_timing["algorithms"]
+            if algo in algorithms:
+                timings[key]["value"] += 100 * value / full_addition
+                found = True
+                break
+        if not found:
+            timings["Common"]["value"] += 100 * value / full_addition
+    simple_timings = {k:v["value"] for k,v in timings.items()}
+    output_list = sorted(simple_timings.items(), key=operator.itemgetter(1), reverse=True)
+    return output_list
diff --git a/checker/plotting/performance_breakdown.py b/checker/plotting/performance_breakdown.py
index f92c5d8c6f556d25e30beeb3da9df664c145e784..9e3b1b675baa6eb16d60d959fd2a793baa2ddb2b 100755
--- a/checker/plotting/performance_breakdown.py
+++ b/checker/plotting/performance_breakdown.py
@@ -74,7 +74,7 @@ def main(argv):
   pv_algorithms = ["pv_beamline_peak", "pv_beamline_multi_fitter", "pv_beamline_histo", "pv_beamline_extrapolate"]
   ut_algorithms = ["consolidate_ut_tracks", "copy_ut_track_hit_number", "ut_decode_raw_banks_in_order", "ut_pre_decode", "ut_find_permutation", "ut_calculate_number_of_hits", "compass_ut", "ut_search_windows"]
   scifi_algorithms = ["scifi_pre_decode_v4", "scifi_raw_bank_decoder_v4", "scifi_calculate_cluster_count_v4", "scifi_direct_decoder_v4", "consolidate_scifi_tracks", "copy_scifi_track_hit_number"]
-  kalman_algorithms = ["KalmanFilter"]
+  kalman_algorithms = ["velo_kalman", "velo_filter"]
 
   # Convert values to percentages
   full_addition = sum(algorithm_times.values())
diff --git a/checker/plotting/post_telegraf.py b/checker/plotting/post_telegraf.py
new file mode 100755
index 0000000000000000000000000000000000000000..e963c156efec705dddff044a77d73c52335d366b
--- /dev/null
+++ b/checker/plotting/post_telegraf.py
@@ -0,0 +1,153 @@
+#!/usr/bin/python3
+import sys
+import os
+import re
+import traceback
+import operator
+import csv
+import requests
+import time
+from optparse import OptionParser
+from group_algos import group_algos
+
+
+def send_to_telegraf(performance, runtime, timings, device, options):
+    session = requests.session()
+    session.trust_env = False
+    now = time.time()
+    timestamp = int(now) * 1000000000
+
+    telegraf_string = "AllenCIPerformance,branch=%s,device=%s,sequence=%s " % (options.branch, device, options.sequence)
+    for label,timing in timings:
+          print(label, timing)
+          telegraf_string += '{}={:.2f},'.format(label,timing)
+
+    telegraf_string += "performance=%.2f,runtime=%.2f " % (performance, runtime)
+    telegraf_string += " %d" % timestamp
+
+    try:
+        print('Sending telegraf string: %s' % telegraf_string)
+        response = session.post(options.telegraf_url, data=telegraf_string)
+        #print('http response: %s' % response.headers)
+    except:
+        print('Failed to submit data string %s' % telegraf_string)
+        print(traceback.format_exc())
+
+
+"""
+Produces a plot of the performance breakdown of the sequence under execution
+"""
+def main(argv):
+    global final_msg
+    parser = OptionParser()
+    parser.add_option('-d', '--dir', dest='output_directory', help='The directory to scan for build_* directories')
+    parser.add_option('-f', '--file_pattern', dest='file_pattern', default='profiler_output.txt',
+                      help='The file name to look for profiler data in each build_ directoy. default: profiler_output.txt')
+    parser.add_option('-b', '--branch', dest='branch', default = 'UNKNOWN', help='branch tag to be forwarded to telegraf/grafana')
+    parser.add_option('-s', '--sequence', dest='sequence', default = 'UNKNOWN', help='sequence name tag to be forwarded to telegraf/grafana')
+    parser.add_option('-t', '--telegraf_url', dest='telegraf_url', default = 'http://localhost:8186/telegraf', help='URL to send telegraf output to')
+
+
+    (options, args) = parser.parse_args()
+
+    if options.output_directory is None:
+        parser.print_help()
+        print('Please specify an input directory')
+        return
+
+    try:
+        dirs = []
+        files = os.listdir(options.output_directory)
+
+    except:
+        print('Failed to read profiler output directory: %s' % options.dir)
+        traceback.print_exc()
+        return
+
+    dirs = []
+    for file in files:
+        if file.startswith('output_'):
+            dirs.append(file)
+
+    for d in dirs:
+        filepath = options.output_directory +"/" + d + "/" + options.file_pattern
+        try:
+            f = open(filepath)
+            s = f.read()
+            f.close()
+        except:
+            print('Error while trying to read profiler file: %s' % filepath)
+            traceback.print_exc()
+            continue
+
+        try:
+            device = d.split('_')[1]
+        except:
+            traceback.print_exc()
+            print('Could not extract device name from directory name: %s' % d)
+            continue
+
+        # Fetch all timings into timings[0]
+        start_s = "GPU activities:"
+        end_s = "API calls:"
+        timings = re.findall(start_s + "(.*?)" + end_s, s, re.DOTALL)
+        try:
+            perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0]
+            perf = float(perf)
+        except:
+            print('Failed to read performance data from output')
+            print(traceback.format_exc())
+
+        try:
+            runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0]
+            runtime = float(runtime)
+        except:
+            print('Failed to read runtime from output')
+            print(traceback.format_exc())
+
+
+        # Regexp for one line
+        # Note: An algorithm line looks like:
+        #  11.08%  6.47377s       700  9.2482ms  3.7639ms  15.887ms  lf_search_uv_windows(unsigned int const *, unsigned int const *, int const *, SciFi::TrackHits const *, int const *, char const *, LookingForward::Constants const *, float const *, MiniState const *, short*)
+        # Note: Intended behaviour: Does *not* match nvidia calls like:
+        #  0.04%  20.484ms      9100  2.2500us     832ns  16.255us  [CUDA memcpy DtoH]
+        regexp_expression = ".*?([0-9]+\.[0-9]+)\%.*[um]s  ([a-zA-Z][a-zA-Z\_0-9]+).*"
+
+        algorithm_times = {}
+
+        for line in timings[0].split("\n"):
+            m = re.match(regexp_expression, line)
+            if m:
+                algorithm_times[m.group(2)] = float(m.group(1))
+
+        output_list = sorted(algorithm_times.items(), key=operator.itemgetter(1), reverse=True)
+        print('Algorithm Times:')
+        print(output_list)
+
+        try:
+            perf = re.findall("([0-9]+\.[0-9]+) events\/s", s)[0]
+            perf = float(perf)
+        except:
+            print('Failed to read performance data from output')
+            print(traceback.format_exc())
+
+        try:
+            runtime = re.findall("Ran test for ([0-9]+\.[0-9]+) seconds", s)[0]
+            runtime = float(runtime)
+        except:
+            print('Failed to read runtime from output')
+            print(traceback.format_exc())
+
+        print('Algorithm Group Times:')
+        timings = group_algos(algorithm_times)
+
+        print(timings)
+
+        print('Throughput: %.2f' % (perf))
+        print('Runtime:    %.2f' % (runtime))
+
+        send_to_telegraf(perf, runtime, timings, device, options)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
+
diff --git a/checker/plotting/speedup_cli_plot.py b/checker/plotting/speedup_cli_plot.py
new file mode 100755
index 0000000000000000000000000000000000000000..0a2314f0b01917354aba5d23e58cb4794fa80412
--- /dev/null
+++ b/checker/plotting/speedup_cli_plot.py
@@ -0,0 +1,230 @@
+#!/usr/bin/python3
+import csv
+import subprocess
+import os
+import re
+
+#######################################################################
+# From termgraph
+# https://github.com/mkaz/termgraph/
+#######################################################################
+
+tg_width = 50
+tg_format = '{:<4.2f}'
+DELIM = ','
+TICK = 'â–‡'
+SM_TICK = '▏'
+
+
+def find_max_label_length(labels):
+    """Return the maximum length for the labels."""
+    length = 0
+    for i in range(len(labels)):
+        if len(labels[i]) > length:
+            length = len(labels[i])
+
+    return length
+
+
+def normalize(data, width):
+    """Normalize the data and return it."""
+    # min_dat = find_min(data)
+    min_dat = data[-1]
+    # We offset by the minimum if there's a negative.
+    off_data = []
+    if min_dat < 0:
+        min_dat = abs(min_dat)
+        for dat in data:
+            off_data.append([_d + min_dat for _d in dat])
+    else:
+        off_data = data
+    # min_dat = find_min(off_data)
+    # max_dat = find_max(off_data)
+    min_dat = off_data[-1]
+    max_dat = off_data[0]
+
+    if max_dat < width:
+        # Don't need to normalize if the max value
+        # is less than the width we allow.
+        return off_data
+
+    # max_dat / width is the value for a single tick. norm_factor is the
+    # inverse of this value
+    # If you divide a number to the value of single tick, you will find how
+    # many ticks it does contain basically.
+    norm_factor = width / float(max_dat)
+    normal_dat = []
+    for dat in off_data:
+        normal_dat.append([_v * norm_factor for _v in dat])
+
+    return normal_dat
+
+
+def horiz_rows(labels, data, normal_dat):
+    global final_msg
+    """Prepare the horizontal graph.
+     Each row is printed through the print_row function."""
+    # val_min = find_min(data)
+    val_min = data[-1]
+
+    for i in range(len(labels)):
+        label = "{:<{x}}: ".format(labels[i], x=find_max_label_length(labels))
+
+        values = data[i]
+        num_blocks = normal_dat[i]
+
+        for j in range(1):
+            # In Multiple series graph 1st category has label at the beginning,
+            # whereas the rest categories have only spaces.
+            if j > 0:
+                len_label = len(label)
+                label = ' ' * len_label
+            tail = ' {}'.format(tg_format.format(values))
+            color = None
+            # print(label, end="")
+            final_msg += label
+            yield (values, int(num_blocks), val_min, color)
+            final_msg += tail + 'x\n'
+
+
+# Prints a row of the horizontal graph.
+def print_row(value, num_blocks, val_min, colors):
+    global final_msg
+    """A method to print a row for a horizontal graphs.
+
+  i.e:
+  1: ▇▇ 2
+  2: ▇▇▇ 3
+  3: ▇▇▇▇ 4
+  """
+
+    if num_blocks < 1 and (value > val_min or value > 0):
+        # Print something if it's not the smallest
+        # and the normal value is less than one.
+        # sys.stdout.write(SM_TICK)
+        # print(SM_TICK, end="")
+        final_msg += SM_TICK
+    else:
+        for _ in range(num_blocks):
+            # sys.stdout.write(TICK)
+            # print(TICK, end="")
+            final_msg += TICK
+
+
+def chart(data, labels):
+    # One category/Multiple series graph with same scale
+    # All-together normalization
+    normal_dat = normalize(data, tg_width)
+    for row in horiz_rows(labels, data, normal_dat):
+        print_row(*row)
+
+
+#######################################################################
+# Finish termgraph
+#######################################################################
+
+import traceback
+from optparse import OptionParser
+from termgraph import TermGraph
+
+
+def format_text(title, algorithm_times, options):
+    # Prepare data
+    final_vals = []
+    final_tags = []
+
+    keylist = sorted(algorithm_times.keys(),
+                     key=lambda x: algorithm_times[x],
+                     reverse=True)
+    for k in keylist:
+        val = algorithm_times[k]
+        final_tags.append(k)
+        final_vals.append(val)
+
+    # Plot
+    print(final_tags)
+    print(final_vals)
+    tg = TermGraph(suffix=options.unit, x_max=options.x_max)
+    final_msg = tg.chart(final_vals, final_tags)
+
+    text = '{"text": "%s\n```\n%s```"}' % (title, final_msg)
+    return text
+
+
+def send_to_mattermost(text, mattermost_url):
+    subprocess.call([
+        "curl", "-i", "-X", "POST", "-H", 'Content-Type: application/json',
+        "-d", text, mattermost_url
+    ])
+
+
+"""
+Produces a plot of the performance breakdown of the sequence under execution
+"""
+
+
+def main():
+    usage = '%prog [options] <-d data_file>\n' + \
+            'Example: %prog -d data.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"'
+    parser = OptionParser(usage=usage)
+    parser.add_option(
+        '-m',
+        '--mattermost_url',
+        dest='mattermost_url',
+        help='The url where to post outputs generated for mattermost')
+    parser.add_option('-d',
+                      '--data_file',
+                      dest='data_file',
+                      help='Path to a data file to plot')
+    parser.add_option(
+        '-u',
+        '--unit',
+        dest='unit',
+        default='',
+        help='A unit suffix to append to evey value. Default is an empty string'
+    )
+    parser.add_option(
+        '-x',
+        '--x_max',
+        dest='x_max',
+        default=50,
+        help='Graph X axis is at least this many units wide. (default=50)')
+    parser.add_option('-t',
+                      '--title',
+                      dest='title',
+                      default='',
+                      help='Title for your graph. (default: empty string)')
+    (options, args) = parser.parse_args()
+
+    if options.data_file is None:
+        parser.print_help()
+
+    try:
+        options.x_max = float(options.x_max)
+    except:
+        parser.print_help()
+        print('\n-x has to be a convertible floating point value!\n')
+        return -1
+
+    algorithm_times = {}
+    with open(options.data_file) as csvfile:
+        csv_reader = csv.reader(csvfile, delimiter=',')
+        for row in csv_reader:
+            try:
+                algorithm_times[row[0]] = float(row[1])
+            except:
+                print(traceback.format_exc())
+
+    # Convert throughputs to speedups
+    base_speed = min(algorithm_times.values())
+    for k in algorithm_times.keys():
+        algorithm_times[k] /= base_speed
+
+    text = format_text(options.title, algorithm_times, options)
+    print(text)
+    if options.mattermost_url is not None:
+        send_to_mattermost(text, options.mattermost_url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/checker/plotting/speedup_sample.dat b/checker/plotting/speedup_sample.dat
new file mode 100644
index 0000000000000000000000000000000000000000..688ec80bec741af64355e6131b3441f042d2db9d
--- /dev/null
+++ b/checker/plotting/speedup_sample.dat
@@ -0,0 +1,8 @@
+!!SAMPLE!! GTX 1060 6GB,7519.354825
+!!SAMPLE!! GTX 1080 Ti,18982.238479
+!!SAMPLE!! GTX 670,2297.042902
+!!SAMPLE!! GTX 680,2524.368385
+!!SAMPLE!! GTX TITAN X,11117.198792
+!!SAMPLE!! RTX 2080 Ti,43705.972936
+!!SAMPLE!! T4,22077.601157
+!!SAMPLE!! V100-PCIE-32GB,47568.303480
diff --git a/checker/plotting/termgraph.py b/checker/plotting/termgraph.py
new file mode 100755
index 0000000000000000000000000000000000000000..857ade16ee33472b482cdf80224780706e8da167
--- /dev/null
+++ b/checker/plotting/termgraph.py
@@ -0,0 +1,203 @@
+#!/usr/bin/python3
+
+import math
+
+#######################################################################
+# From termgraph
+# https://github.com/mkaz/termgraph/
+#######################################################################
+
+
+class TermGraph:
+    def __init__(self, tg_width=50, tg_format='{:<4.2f}', delim=',', tick='█', sm_tick='▌', suffix="", x_max = 50):
+        self.tg_width = tg_width
+        self.tg_format = tg_format
+        self.DELIM = delim
+        self.TICK = tick
+        self.SM_TICK = sm_tick
+        self.text = ""
+        self.suffix = suffix
+        self.x_max = x_max
+        self.big_tick = "┼"
+        self.small_tick = "â”´"
+        self.horiz = "─"
+        self.label_length = 0
+ 
+    def find_max_label_length(self, labels):
+        """Return the maximum length for the labels."""
+        length = 0
+        for i in range(len(labels)):
+            if len(labels[i]) > length:
+                length = len(labels[i])
+
+        self.label_length = length
+        return length # Yes, yes, i know ...
+
+    def getScale(self, data):
+        # min_dat = find_min(data)
+        self.min_dat = min(data)
+        if max(data) < self.x_max:
+            self.max_dat = self.x_max
+        else:
+            self.max_dat = max(data)
+        #epsilon = (maximum - minimum) / 1e6
+        #maximum += epsilon
+        #minimum -= epsilon
+        rr = self.max_dat - self.min_dat
+        stepCount = 10
+        roughStep = rr / (stepCount -1)
+
+        goodNormalizedSteps = [1, 2, 5, 10]
+        stepPower = math.pow(10, -math.floor(math.log10(abs(roughStep))))
+        normalizedStep = roughStep * stepPower
+        goodNormalizedStep = list(filter(lambda x: x > normalizedStep, goodNormalizedSteps))[0]
+        self.step = int(goodNormalizedStep / stepPower)
+        self.scaleMax = int(math.ceil(self.max_dat / self.step) * self.step)
+        self.scaleMin = int(math.floor(self.min_dat / self.step) * self.step) 
+        self.strlen = max(len(str(int(self.scaleMin))), len(str(int(self.scaleMax))))
+        print(self.strlen)
+        self.nSteps = int((self.scaleMax - self.scaleMin) / self.step)
+        print(self.scaleMin, self.scaleMax, self.step, self.nSteps)
+
+        self.tick_dist = int(self.tg_width / (self.scaleMax - self.scaleMin) * self.step / 2)
+        print(self.tick_dist)
+    
+        self.tg_width = int(self.tick_dist * 2 * self.nSteps)
+        print('Updating tg_width to: %d' % self.tg_width)
+        return
+
+    def numLen(self, num):
+        return len(str(int(num)))
+
+    def printAxis(self):
+        self.text += " " * (self.label_length + 1)
+        self.text += self.big_tick
+
+        for i in range(0, self.nSteps * 2):
+            self.text += self.horiz * int(self.tick_dist - 1)
+            if i % 2 == 0:
+                self.text += self.small_tick
+            else:
+                self.text += self.big_tick
+
+        self.text += "\n"
+        
+        l = self.numLen(self.scaleMin)
+        l = int(l/2)
+        self.text += " " * (self.label_length - l - self.tick_dist + 2) 
+        for i in range(self.scaleMin,  self.scaleMax + self.step, self.step):
+            self.text += '{:^{width}}'.format(str(i), width = '%d' % (self.tick_dist * 2))
+        self.text += "\n"
+
+    def normalize(self, data, width):
+        """Normalize the data and return it."""
+      
+        # We offset by the minimum if there's a negative.
+        off_data = []
+        if self.min_dat < 0:
+            self.min_dat = abs(self.min_dat)
+            for dat in data:
+                off_data.append(self.min_dat + dat)
+        else:
+            off_data = data
+        #self.max_dat += abs(self.min_dat)
+
+        #if self.max_dat < self.x_max:
+            # Don't need to normalize if the max value
+            # is less than the width we allow.
+            #return off_data
+        #    self.max_dat = self.x_max
+
+        # max_dat / width is the value for a single tick. norm_factor is the
+        # inverse of this value
+        # If you divide a number to the value of single tick, you will find how
+        # many ticks it does contain basically.
+        print('width: %d, max_dat: %f' % (width, self.scaleMax))
+        norm_factor = width / float(self.scaleMax)
+        normal_dat = []
+        for dat in off_data:
+            normal_dat.append(dat * norm_factor)
+
+
+        return normal_dat
+
+    def horiz_rows(self, labels, data, normal_dat):
+        """Prepare the horizontal graph.
+           Each row is printed through the print_row function."""
+        val_min = min(data)
+
+        for i in range(len(labels)):
+            label = "{:<{x}} │".format(labels[i], x=self.find_max_label_length(labels))
+
+            values = data[i]
+            num_blocks = normal_dat[i]
+
+            for j in range(1):
+                # In Multiple series graph 1st category has label at the beginning,
+                # whereas the rest categories have only spaces.
+                if j > 0:
+                    len_label = len(label)
+                    label = ' ' * len_label
+                tail = ' {} %s'.format(self.tg_format.format(values)) % self.suffix
+                color = None
+                # print(label, end="")
+                self.text += label
+                yield(values, int(num_blocks), val_min, color)
+                self.text += tail + '\n'
+
+    # Prints a row of the horizontal graph.
+
+    def print_row(self, value, num_blocks, val_min, color):
+        """A method to print a row for a horizontal graphs.
+      
+        i.e:
+        1: ▇▇ 2
+        2: ▇▇▇ 3
+        3: ▇▇▇▇ 4
+        """
+
+        if num_blocks < 1 and (value >= val_min or value > 0):
+            # Print something if it's not the smallest
+            # and the normal value is less than one
+            # sys.stdout.write(SM_TICK)
+            # print(SM_TICK, end="")
+            self.text += self.SM_TICK
+        else:
+            for _ in range(num_blocks):
+                # sys.stdout.write(TICK)
+                # print(TICK, end="")
+                self.text += self.TICK
+
+        for _ in range(max([num_blocks,1]), self.tg_width):
+            self.text += ' '
+
+    def chart(self, data, labels):
+        # One category/Multiple series graph with same scale
+        # All-together normalization
+        self.text=""
+        self.getScale(data)
+        normal_dat = self.normalize(data, self.tg_width)
+        for row in self.horiz_rows(labels, data, normal_dat):
+            self.print_row(*row)
+        self.printAxis()
+
+        return self.text
+
+
+#######################################################################
+# Finish termgraph
+#######################################################################
+
+
+def main():
+    g = TermGraph(suffix='Hz')
+    data = [-100, 500, 0, -111, 222.324324]
+    labels = ['foo', 'bar', 'banana', 'monkey', 'fish']
+    print(g.chart(data, labels))
+
+
+#Small test application
+
+if __name__ == '__main__':
+    main()
+
diff --git a/checker/pv/CMakeLists.txt b/checker/pv/CMakeLists.txt
index aa3ebbc80405175f817ca201d4b3ff015ab12521..5f11a9eb4d8c8e8a89d31b82f8ebb6089caac285 100644
--- a/checker/pv/CMakeLists.txt
+++ b/checker/pv/CMakeLists.txt
@@ -11,7 +11,7 @@ include_directories(${ROOT_INCLUDE_DIRS})
 
 file(GLOB pv_checker_sources "src/*cpp")
 
-add_library(PVChecking SHARED
+add_library(PVChecking STATIC
   ${pv_checker_sources}
 )
 
diff --git a/checker/pv/include/PrimaryVertexChecker.h b/checker/pv/include/PrimaryVertexChecker.h
index 7eecacdced46dd9ef70f33a5f8453c53ee28a517..c8ffa87edafd030e512cd523e898ecc6e57ce972 100644
--- a/checker/pv/include/PrimaryVertexChecker.h
+++ b/checker/pv/include/PrimaryVertexChecker.h
@@ -65,64 +65,8 @@ public:
   PV::Vertex* pRECPV;            // pointer to REC PV
 } RecPVInfo;
 
-void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec)
-{
+void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec);
 
-  double mindist = 999999.;
-  int indexmc = -1;
+void printRat(std::string mes, int a, int b);
 
-  for (int imc = 0; imc < (int) mcpvvec.size(); imc++) {
-    double dist = fabs(mcpvvec[imc].pMCPV->z - rinfo[ipv].z);
-    if (dist < mindist) {
-      mindist = dist;
-      indexmc = imc;
-    }
-  }
-  if (indexmc > -1) {
-    if (mindist < 5.0 * rinfo[ipv].positionSigma.z) {
-      rinfo[ipv].indexMCPVInfo = indexmc;
-      mcpvvec[indexmc].indexRecPVInfo = ipv;
-      mcpvvec[indexmc].number_rec_vtx++;
-    }
-  }
-}
-
-void printRat(std::string mes, int a, int b)
-{
-
-  float rat = 0.f;
-  if (b > 0) rat = 1.0f * a / b;
-
-  // reformat message
-  unsigned int len = 20;
-  std::string pmes = mes;
-  while (pmes.length() < len) {
-    pmes += " ";
-  }
-  pmes += " : ";
-
-  info_cout << pmes << " " << rat << "( " << a << " / " << b << " )" << std::endl;
-}
-
-std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc)
-{
-
-  std::vector<MCPVInfo>::iterator itret = rblemcpv.end();
-  double mindist = 999999.;
-  if (rblemcpv.size() < 2) return itret;
-  std::vector<MCPVInfo>::iterator it;
-  for (it = rblemcpv.begin(); it != rblemcpv.end(); it++) {
-    if (it->pMCPV != itmc->pMCPV) {
-      double diff_x = it->pMCPV->x - itmc->pMCPV->x;
-      double diff_y = it->pMCPV->y - itmc->pMCPV->y;
-      double diff_z = it->pMCPV->z - itmc->pMCPV->z;
-      double dist = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z);
-
-      if (dist < mindist) {
-        mindist = dist;
-        itret = it;
-      }
-    }
-  }
-  return itret;
-}
+std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc);
diff --git a/checker/pv/src/PrimaryVertexChecker.cpp b/checker/pv/src/PrimaryVertexChecker.cpp
index 8386f78b17d41103214d69b224b9482bb8fe9c37..cd0a3aa306e2215a8abce77e8aebe9b11cdf50cb 100644
--- a/checker/pv/src/PrimaryVertexChecker.cpp
+++ b/checker/pv/src/PrimaryVertexChecker.cpp
@@ -540,3 +540,65 @@ void checkPVs(
 #endif
   }
 }
+
+void match_mc_vertex_by_distance(int ipv, std::vector<RecPVInfo>& rinfo, std::vector<MCPVInfo>& mcpvvec)
+{
+
+  double mindist = 999999.;
+  int indexmc = -1;
+
+  for (int imc = 0; imc < (int) mcpvvec.size(); imc++) {
+    double dist = fabs(mcpvvec[imc].pMCPV->z - rinfo[ipv].z);
+    if (dist < mindist) {
+      mindist = dist;
+      indexmc = imc;
+    }
+  }
+  if (indexmc > -1) {
+    if (mindist < 5.0 * rinfo[ipv].positionSigma.z) {
+      rinfo[ipv].indexMCPVInfo = indexmc;
+      mcpvvec[indexmc].indexRecPVInfo = ipv;
+      mcpvvec[indexmc].number_rec_vtx++;
+    }
+  }
+}
+
+void printRat(std::string mes, int a, int b)
+{
+
+  float rat = 0.f;
+  if (b > 0) rat = 1.0f * a / b;
+
+  // reformat message
+  unsigned int len = 20;
+  std::string pmes = mes;
+  while (pmes.length() < len) {
+    pmes += " ";
+  }
+  pmes += " : ";
+
+  info_cout << pmes << " " << rat << "( " << a << " / " << b << " )" << std::endl;
+}
+
+std::vector<MCPVInfo>::iterator closestMCPV(std::vector<MCPVInfo>& rblemcpv, std::vector<MCPVInfo>::iterator& itmc)
+{
+
+  std::vector<MCPVInfo>::iterator itret = rblemcpv.end();
+  double mindist = 999999.;
+  if (rblemcpv.size() < 2) return itret;
+  std::vector<MCPVInfo>::iterator it;
+  for (it = rblemcpv.begin(); it != rblemcpv.end(); it++) {
+    if (it->pMCPV != itmc->pMCPV) {
+      double diff_x = it->pMCPV->x - itmc->pMCPV->x;
+      double diff_y = it->pMCPV->y - itmc->pMCPV->y;
+      double diff_z = it->pMCPV->z - itmc->pMCPV->z;
+      double dist = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z);
+
+      if (dist < mindist) {
+        mindist = dist;
+        itret = it;
+      }
+    }
+  }
+  return itret;
+}
diff --git a/x86/velo/clustering/CMakeLists.txt b/x86/velo/clustering/CMakeLists.txt
index f9e09f74a4ff68c70d37eb2c3b2c50ed45f12bd5..99298002ba994349e2578d2d90ad59f21f1dddd9 100644
--- a/x86/velo/clustering/CMakeLists.txt
+++ b/x86/velo/clustering/CMakeLists.txt
@@ -7,4 +7,4 @@ file(GLOB x86_clustering "src/*cpp")
 # to do: why do I need nvcc to compile the x86 code?
 # it depends on clustering functions in cuda/velo/mask_clustering,
 # we should make them __host__ and __device__ functions
-add_library(x86Clustering SHARED ${x86_clustering})
+add_library(x86Clustering STATIC ${x86_clustering})