Skip to content
Snippets Groups Projects

Performance evaluation of ROOT file compression algorithms and levels

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Yuriy Volkotrub

    This Python script performs file compression, measures compression time, calculates file size, and determines the reading speed for ROOT files using different compression algorithms and levels.

    Edited
    varCompressAlgs.py 3.76 KiB
    import ROOT
    import time
    import os
    
    # Function to perform compression and measure time
    def compress_file(input_file, output_file, compression_algo, compression_level):
        input_f = ROOT.TFile.Open(input_file, "READ")
        tree = input_f.Get("CollectionTree")  
    
        if not tree:
            print(f"Error: CollectionTree not found in {input_file}")
            input_f.Close()
            return None
    
        output_f = ROOT.TFile(output_file, "RECREATE", "", compression_level)
        output_f.SetCompressionAlgorithm(compression_algo)
    
        # Clone the tree and compress it
        output_tree = tree.CloneTree(0)  
    
        n_entries = tree.GetEntries()
        start_time = time.time()
        for i in range(n_entries):
            tree.GetEntry(i)
            output_tree.Fill()
        end_time = time.time()
    
        compression_time = end_time - start_time
        output_f.Write()
        output_f.Close()
        input_f.Close()
    
        return compression_time
    
    # Function to measure file size
    def get_file_size(file_path):
        return os.path.getsize(file_path) / (1024 * 1024)  # Return size in MB
    
    # Function to measure reading speed in MB/s
    def measure_reading_speed(input_file, n_events=20000):
        input_f = ROOT.TFile.Open(input_file, "READ")
        tree = input_f.Get("CollectionTree")
    
        if not tree:
            print(f"Error: CollectionTree not found in {input_file}")
            input_f.Close()
            return float("inf")
    
        # Measure the start time and total size in MB
        start_time = time.time()
        total_size = get_file_size(input_file)  # Get the file size in MB
        for i in range(min(n_events, tree.GetEntries())):
            tree.GetEntry(i)
        end_time = time.time()
    
        input_f.Close()
        time_taken = end_time - start_time  # Total time taken
        reading_speed = total_size / time_taken if time_taken > 0 else 0  # MB/s
    
        return reading_speed
    
    input_files = [
        "../../../../data24_13p6TeV.00472677.physics_Main.deriv.DAOD_PHYSLITE.f1437_m2243_p6142/DAOD_PHYSLITE.38116464._000027.pool.root.1"
    ]
    
    compression_algorithms = {
        "lzma": ROOT.kLZMA,
        "lz4": ROOT.kLZ4,
        "zlib": ROOT.kZLIB,
        "zstd": ROOT.kZSTD
    }
    
    compression_levels = [1, 5, 9]
    
    # CSV logging
    log_file_path = "compression_results.csv"
    with open(log_file_path, "w") as log_file:
        log_file.write("InputFile,Algorithm,Level,FileSizeMB,CompressionTimeSec,ReadSpeedMBps\n")
    
    # Loop over files, algorithms, and compression levels
    for input_file in input_files:
        for algo_name, algo_code in compression_algorithms.items():
            for level in compression_levels:
                output_file = f"compressed_{algo_name}_level{level}.root"
                
                # Compress the file and measure time
                compression_time = compress_file(input_file, output_file, algo_code, level)
                if compression_time is None:
                    continue
                
                # Measure file size
                file_size = get_file_size(output_file)
                
                # Measure reading speed
                reading_speed = measure_reading_speed(output_file)
    
                input_size = get_file_size(input_file)  # Input file size in MB
                compression_ratio = file_size / input_size
                
                with open(log_file_path, "a") as log_file:
                    # log_file.write(f"{input_file},{algo_name},{level},{file_size:.2f},{compression_time:.2f},{reading_speed:.2f}\n")
                    log_file.write(f"{input_file},{algo_name},{level},{file_size:.2f},{compression_time:.2f},{reading_speed:.2f},{compression_ratio:.4f}\n")
                
                # print(f"File: {output_file}, Size: {file_size:.2f} MB, Compression Time: {compression_time:.2f} sec, Reading Speed: {reading_speed:.2f} MB/s")
                print(f"File: {output_file}, Size: {file_size:.2f} MB, Compression Time: {compression_time:.2f} sec, Reading Speed: {reading_speed:.2f} MB/s, Compression Ratio: {compression_ratio:.4f}")
    
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment