Performance evaluation of ROOT file compression algorithms and levels
The snippet can be accessed without any authentication.
Authored by
Yuriy Volkotrub
This Python script performs file compression, measures compression time, calculates file size, and determines the reading speed for ROOT files using different compression algorithms and levels.
varCompressAlgs.py 3.76 KiB
import ROOT
import time
import os
# Function to perform compression and measure time
def compress_file(input_file, output_file, compression_algo, compression_level):
input_f = ROOT.TFile.Open(input_file, "READ")
tree = input_f.Get("CollectionTree")
if not tree:
print(f"Error: CollectionTree not found in {input_file}")
input_f.Close()
return None
output_f = ROOT.TFile(output_file, "RECREATE", "", compression_level)
output_f.SetCompressionAlgorithm(compression_algo)
# Clone the tree and compress it
output_tree = tree.CloneTree(0)
n_entries = tree.GetEntries()
start_time = time.time()
for i in range(n_entries):
tree.GetEntry(i)
output_tree.Fill()
end_time = time.time()
compression_time = end_time - start_time
output_f.Write()
output_f.Close()
input_f.Close()
return compression_time
# Function to measure file size
def get_file_size(file_path):
return os.path.getsize(file_path) / (1024 * 1024) # Return size in MB
# Function to measure reading speed in MB/s
def measure_reading_speed(input_file, n_events=20000):
input_f = ROOT.TFile.Open(input_file, "READ")
tree = input_f.Get("CollectionTree")
if not tree:
print(f"Error: CollectionTree not found in {input_file}")
input_f.Close()
return float("inf")
# Measure the start time and total size in MB
start_time = time.time()
total_size = get_file_size(input_file) # Get the file size in MB
for i in range(min(n_events, tree.GetEntries())):
tree.GetEntry(i)
end_time = time.time()
input_f.Close()
time_taken = end_time - start_time # Total time taken
reading_speed = total_size / time_taken if time_taken > 0 else 0 # MB/s
return reading_speed
input_files = [
"../../../../data24_13p6TeV.00472677.physics_Main.deriv.DAOD_PHYSLITE.f1437_m2243_p6142/DAOD_PHYSLITE.38116464._000027.pool.root.1"
]
compression_algorithms = {
"lzma": ROOT.kLZMA,
"lz4": ROOT.kLZ4,
"zlib": ROOT.kZLIB,
"zstd": ROOT.kZSTD
}
compression_levels = [1, 5, 9]
# CSV logging
log_file_path = "compression_results.csv"
with open(log_file_path, "w") as log_file:
log_file.write("InputFile,Algorithm,Level,FileSizeMB,CompressionTimeSec,ReadSpeedMBps\n")
# Loop over files, algorithms, and compression levels
for input_file in input_files:
for algo_name, algo_code in compression_algorithms.items():
for level in compression_levels:
output_file = f"compressed_{algo_name}_level{level}.root"
# Compress the file and measure time
compression_time = compress_file(input_file, output_file, algo_code, level)
if compression_time is None:
continue
# Measure file size
file_size = get_file_size(output_file)
# Measure reading speed
reading_speed = measure_reading_speed(output_file)
input_size = get_file_size(input_file) # Input file size in MB
compression_ratio = file_size / input_size
with open(log_file_path, "a") as log_file:
# log_file.write(f"{input_file},{algo_name},{level},{file_size:.2f},{compression_time:.2f},{reading_speed:.2f}\n")
log_file.write(f"{input_file},{algo_name},{level},{file_size:.2f},{compression_time:.2f},{reading_speed:.2f},{compression_ratio:.4f}\n")
# print(f"File: {output_file}, Size: {file_size:.2f} MB, Compression Time: {compression_time:.2f} sec, Reading Speed: {reading_speed:.2f} MB/s")
print(f"File: {output_file}, Size: {file_size:.2f} MB, Compression Time: {compression_time:.2f} sec, Reading Speed: {reading_speed:.2f} MB/s, Compression Ratio: {compression_ratio:.4f}")
Please register or sign in to comment