From 5e05efce9de3a923b3ce877dfce751dc5e49cb0c Mon Sep 17 00:00:00 2001
From: Roel Aaij <roel.aaij@cern.ch>
Date: Thu, 29 Feb 2024 14:48:29 +0100
Subject: [PATCH] Better diagnostics for TCK tests

---
 Rec/Allen/python/Allen/qmtest/utils.py        | 27 ++++++------
 Rec/Allen/tests/options/compare_hlt1_tcks.py  | 42 +++++++++++++++----
 .../options/test_tck_allen_write_config.py    |  8 ++--
 .../tests/qmtest/compare_tck_allen_config.qmt | 20 ++++++---
 Rec/Allen/tests/qmtest/compare_tcks.qmt       | 12 +++---
 5 files changed, 71 insertions(+), 38 deletions(-)

diff --git a/Rec/Allen/python/Allen/qmtest/utils.py b/Rec/Allen/python/Allen/qmtest/utils.py
index af433d6e40a..441953b51f9 100644
--- a/Rec/Allen/python/Allen/qmtest/utils.py
+++ b/Rec/Allen/python/Allen/qmtest/utils.py
@@ -9,6 +9,9 @@
 # or submit itself to any jurisdiction.                                       #
 ###############################################################################
 from collections import defaultdict
+from io import StringIO
+import json
+import difflib
 
 
 def good_sequence(s):
@@ -20,18 +23,12 @@ def good_sequence(s):
     return (physics or extra) and not bad
 
 
-def print_sequence_differences(a, b):
-    diff_keys = set(a.keys()).symmetric_difference(set(b.keys()))
-    diff = defaultdict(dict)
-    ka = [k for k in a.keys() if k not in diff_keys]
-    for k in ka:
-        props_a = a[k]
-        props_b = b[k]
-        diff_prop_keys = set(props_a.keys()).symmetric_difference(
-            set(props_b.keys()))
-        pka = [k for k in props_a.keys() if k not in diff_prop_keys]
-        for prop_key in pka:
-            if props_a[prop_key] != props_b[prop_key]:
-                diff[k][prop_key] = (props_a[prop_key], props_b[prop_key])
-
-    return dict(diff)
+def sequence_differences(a, b, fromfile, tofile):
+    io = StringIO()
+    json.dump(a, io, indent=4)
+    lines_json = [l + '\n' for l in io.getvalue().split('\n')]
+    io = StringIO()
+    json.dump(b, io, indent=4)
+    lines_python = [l + '\n' for l in io.getvalue().split('\n')]
+    return difflib.unified_diff(
+        lines_json, lines_python, fromfile=fromfile, tofile=tofile)
diff --git a/Rec/Allen/tests/options/compare_hlt1_tcks.py b/Rec/Allen/tests/options/compare_hlt1_tcks.py
index 54828bece5f..c7cc81f1c96 100644
--- a/Rec/Allen/tests/options/compare_hlt1_tcks.py
+++ b/Rec/Allen/tests/options/compare_hlt1_tcks.py
@@ -22,7 +22,7 @@ configuration.
 import os
 import sys
 import json
-from Allen.qmtest.utils import print_sequence_differences
+from Allen.qmtest.utils import sequence_differences
 from Allen.tck import manifest_from_git, sequence_from_git
 from pathlib import Path
 
@@ -37,17 +37,26 @@ manifest_python = manifest_from_git(python_repo)
 entries_json = sorted(manifest_json.values(), key=lambda v: v["TCK"])
 entries_python = sorted(manifest_python.values(), key=lambda v: v["TCK"])
 
+# Remove digest to avoid hard-to-understand differences
+for entries in (entries_json, entries_python):
+    for e in entries:
+        e['metadata'].pop('digest')
+
 error = entries_json != entries_python
 if error:
     print("ERROR: Manifests are not the same")
-
-for m, suf in ((manifest_json, "json"), (manifest_python, "python")):
-    with open(f"manifest_{suf}.json", "w") as f:
-        json.dump(m, f)
+    with open("manifest.diff", "w") as f:
+        f.writelines(
+            sequence_differences(
+                entries_json,
+                entries_python,
+                fromfile="manifest_json",
+                tofile="manifest_json"))
 
 for info in entries_json:
     sequence_json = json.loads(sequence_from_git(json_repo, info["TCK"])[0])
-    sequence_python = json.loads(sequence_from_git(json_repo, info["TCK"])[0])
+    sequence_python = json.loads(
+        sequence_from_git(python_repo, info["TCK"])[0])
     sequence_type = next(v for v in info["Release2Type"].values())
     sequence_direct = None
     tck = info["TCK"]
@@ -60,15 +69,30 @@ for info in entries_json:
 
     if sequence_json != sequence_python:
         print(
-            f"ERROR: sequences loaded from JSON and python git repos for TCK {tck} are not the same"
+            f"ERROR: sequences loaded from JSON and Python git repos for TCK {tck} are not the same"
         )
+        with open(f"json_python_{tck}.diff", "w") as f:
+            f.writelines(
+                sequence_differences(sequence_json, sequence_python,
+                                     f"{tck}_json", f"{tck}_python"))
         error = True
     if sequence_json != sequence_direct:
         print(
             f"ERROR: sequences loaded directly from JSON and from JSON git repo for {tck} are not the same"
         )
-
-        print_sequence_differences(sequence_direct, sequence_json)
+        with open(f"direct_json_{tck}.diff", "w") as f:
+            f.writelines(
+                sequence_differences(sequence_direct, sequence_json,
+                                     f"{tck}_direct", f"{tck}_json"))
+        error = True
+    if sequence_python != sequence_direct:
+        print(
+            f"ERROR: sequences loaded directly from JSON and from Python git repo for {tck} are not the same"
+        )
+        with open(f"direct_python_{tck}.diff", "w") as f:
+            f.writelines(
+                sequence_differences(sequence_direct, sequence_python,
+                                     f"{tck}_direct", f"{tck}_python"))
         error = True
 
 sys.exit(error)
diff --git a/Rec/Allen/tests/options/test_tck_allen_write_config.py b/Rec/Allen/tests/options/test_tck_allen_write_config.py
index 10ab50cd342..ac03b2f4e3f 100644
--- a/Rec/Allen/tests/options/test_tck_allen_write_config.py
+++ b/Rec/Allen/tests/options/test_tck_allen_write_config.py
@@ -23,7 +23,7 @@ import os
 import json
 from pathlib import Path
 from subprocess import PIPE, run
-from Allen.qmtest.utils import print_sequence_differences
+from Allen.qmtest.utils import sequence_differences
 from Allen.tck import manifest_from_git, sequence_from_git
 
 tck_repo = Path(os.getenv("PREREQUISITE_0", "")) / "config_json.git"
@@ -73,8 +73,10 @@ for info in manifest_entries:
 
         # Compare configurations
         if allen_sequence != tck_sequence:
-            diffs = print_sequence_differences(tck_sequence, allen_sequence)
             print(
                 "Differences between input configuration from TCK and written by Allen:"
             )
-            print(diffs)
+            with open(f"allen_{tck}.diff", "w") as f:
+                f.writelines(
+                    sequence_differences(allen_sequence, tck_sequence,
+                                         f"{tck}_allen", f"{tck}"))
diff --git a/Rec/Allen/tests/qmtest/compare_tck_allen_config.qmt b/Rec/Allen/tests/qmtest/compare_tck_allen_config.qmt
index 1a7a04d8584..2aceabc8b80 100644
--- a/Rec/Allen/tests/qmtest/compare_tck_allen_config.qmt
+++ b/Rec/Allen/tests/qmtest/compare_tck_allen_config.qmt
@@ -2,12 +2,12 @@
 <!--
     (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration
 
-    This software is distributed under the terms of the Apache License          
-    version 2 (Apache-2.0), copied verbatim in the file "LICENSE".             
-                                                                            
+    This software is distributed under the terms of the Apache License
+    version 2 (Apache-2.0), copied verbatim in the file "LICENSE".
+
     In applying this licence, CERN does not waive the privileges and immunities
-    granted to it by virtue of its status as an Intergovernmental Organization  
-    or submit itself to any jurisdiction.  
+    granted to it by virtue of its status as an Intergovernmental Organization
+    or submit itself to any jurisdiction.
 -->
 <!--
 #######################################################
@@ -28,4 +28,14 @@
   </set></argument>
   <argument name="timeout"><integer>600</integer></argument>
   <argument name="use_temp_dir"><enumeral>per-test</enumeral></argument>
+<argument name="validator"><text>
+
+# No validator for now: only check the exit code
+
+import glob
+workdir = self._common_tmpdir
+for fn in glob.glob(workdir + "/*.diff"):
+    result[os.path.basename(fn)] = open(fn).read()
+
+</text></argument>
 </extension>
diff --git a/Rec/Allen/tests/qmtest/compare_tcks.qmt b/Rec/Allen/tests/qmtest/compare_tcks.qmt
index bf6aad770e0..deea74f32f5 100644
--- a/Rec/Allen/tests/qmtest/compare_tcks.qmt
+++ b/Rec/Allen/tests/qmtest/compare_tcks.qmt
@@ -2,12 +2,12 @@
 <!--
     (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration
 
-    This software is distributed under the terms of the Apache License          
-    version 2 (Apache-2.0), copied verbatim in the file "LICENSE".             
-                                                                            
+    This software is distributed under the terms of the Apache License
+    version 2 (Apache-2.0), copied verbatim in the file "LICENSE".
+
     In applying this licence, CERN does not waive the privileges and immunities
-    granted to it by virtue of its status as an Intergovernmental Organization  
-    or submit itself to any jurisdiction.  
+    granted to it by virtue of its status as an Intergovernmental Organization
+    or submit itself to any jurisdiction.
 -->
 <!--
 #######################################################
@@ -34,7 +34,7 @@
 
 import glob
 workdir = self._common_tmpdir
-for fn in glob.glob(workdir + "/*.json"):
+for fn in glob.glob(workdir + "/*.diff"):
     result[os.path.basename(fn)] = open(fn).read()
 
 </text></argument>
-- 
GitLab