From 32594fe6843818106ac4fd0218fe3cd9050540f0 Mon Sep 17 00:00:00 2001
From: Frank Winklmeier <fwinkl@cern>
Date: Wed, 2 Dec 2020 15:39:28 +0100
Subject: [PATCH] PyUtils: add script to derive python package dependenies

Add `apydep.py` script that can be used to extract python dependencies
between packages and store them in a DOT graph. This is analogous to the
graphviz output of cmake.

In release building mode, run the script on the current source tree and
install the resulting `packages.py.dot`
---
 Tools/PyUtils/CMakeLists.txt |  18 ++++-
 Tools/PyUtils/bin/apydep.py  | 132 +++++++++++++++++++++++++++++++++++
 2 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100755 Tools/PyUtils/bin/apydep.py

diff --git a/Tools/PyUtils/CMakeLists.txt b/Tools/PyUtils/CMakeLists.txt
index a2035f304db..b0934d01792 100644
--- a/Tools/PyUtils/CMakeLists.txt
+++ b/Tools/PyUtils/CMakeLists.txt
@@ -19,7 +19,7 @@ atlas_install_scripts( bin/acmd.py bin/checkFile.py bin/checkPlugins.py
    bin/diffPoolFiles.py bin/dlldep.py bin/dso-stats.py bin/dump-athfile.py
    bin/dumpAthfilelite.py bin/filter-and-merge-d3pd.py bin/getMetadata.py
    bin/gprof2dot bin/issues bin/magnifyPoolFile.py bin/merge-poolfiles.py
-   bin/pool_extractFileIdentifier.py
+   bin/apydep.py bin/pool_extractFileIdentifier.py
    bin/pool_insertFileToCatalog.py bin/print_auditor_callgraph.py bin/pyroot.py
    bin/vmem-sz.py bin/meta-reader.py bin/meta-diff.py bin/tree-orderer.py
    POST_BUILD_CMD ${ATLAS_FLAKE8} )
@@ -54,3 +54,19 @@ atlas_add_test( RootUtils
 
 atlas_add_test( fprint_test
                 SCRIPT python -m PyUtils.fprint )
+
+
+# Create python package dependencies in release building mode.
+# Used as input for `acmd.py cmake depends`:
+if( ATLAS_RELEASE_MODE )
+
+   set( _pydot ${CMAKE_CURRENT_BINARY_DIR}/packages.py.dot )
+   add_custom_command( OUTPUT ${_pydot}
+      COMMAND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/atlas_build_run.sh
+      ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/apydep.py -o ${_pydot} ${CMAKE_SOURCE_DIR}/../../ )
+
+   add_custom_target( build_pydeps ALL DEPENDS ${_pydot} )
+
+   # Install output if available:
+   install( FILES ${_pydot} DESTINATION . OPTIONAL )
+endif()
diff --git a/Tools/PyUtils/bin/apydep.py b/Tools/PyUtils/bin/apydep.py
new file mode 100755
index 00000000000..c8778245935
--- /dev/null
+++ b/Tools/PyUtils/bin/apydep.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
+#
+# Created: Oct 2020, Frank Winklmeier
+#
+"""
+Extract Python dependencies between packages and create DOT graph.
+Both `import` and `include` dependencies are considered.
+"""
+
+import ast
+import sys
+import os
+import argparse
+import pygraphviz
+from collections import defaultdict
+
+class DependencyFinder(ast.NodeVisitor):
+   """Walk an AST collecting import/include statements."""
+
+   def __init__(self):
+      self.imports = set()
+      self.includes = set()
+
+   def visit_Import(self, node):
+      """import XYZ"""
+      self.imports.update(alias.name.split('.',1)[0] for alias in node.names)
+
+   def visit_ImportFrom(self, node):
+      """from XYZ import ABC"""
+      if node.level==0:   # ignore relative imports
+         self.imports.add(node.module.split('.',1)[0])
+
+   def visit_Call(self, node):
+      """"include(XYZ/ABC.py)"""
+      if isinstance(node.func, ast.Name) and node.func.id=='include' and node.args:
+         if isinstance(node.args[0], ast.Str):
+            self.includes.add(node.args[0].s.split('/',1)[0])
+
+
+def get_dependencies(filename, print_error=False):
+   """Get all the imports/includes in a file."""
+
+   try:
+      tree = ast.parse(open(filename,'rb').read(), filename=filename)
+   except SyntaxError as e:
+      if print_error:
+         print(e, file=sys.stderr)
+      return DependencyFinder()
+
+   finder = DependencyFinder()
+   finder.visit(tree)
+
+   return finder
+
+
+def walk_tree(path='./', print_error=False, filterFnc=None):
+   """Walk the source tree and extract python dependencies, filtered by FilterFnc"""
+
+   pkg = 'UNKNOWN'
+   deps = defaultdict(lambda : defaultdict(set))
+   for root, dirs, files in os.walk(path):
+      if 'CMakeLists.txt' in files:
+         pkg = os.path.basename(root)
+
+      if (filterFnc and not filterFnc(pkg)):
+         continue
+
+      for f in filter(lambda p : os.path.splitext(p)[1]=='.py', files):
+         d = get_dependencies(os.path.join(root,f), print_error)
+         deps[pkg]['import'].update(d.imports)
+         deps[pkg]['include'].update(d.includes)
+
+   return deps
+
+
+def make_graph(deps, filterFnc=None):
+   """Save the dependencies as dot graph, nodes filtered by filterFnc"""
+
+   graph = pygraphviz.AGraph(name='AthPyGraph', directed=True)
+   for a in deps:
+      for t in ['import','include']:
+         graph.add_edges_from(((a,b) for b in deps[a][t]
+                               if a!=b and (filterFnc is None or (filterFnc(a) and filterFnc(b)))),
+                               label = t)
+   return graph
+
+
+def main():
+   parser = argparse.ArgumentParser(description=__doc__)
+
+   parser.add_argument('path', metavar='DIRECTORY', nargs='?', default='./',
+                       help='root of source tree [%(default)s]')
+
+   parser.add_argument('-o', '--output', metavar='FILE', type=str,
+                       help='output file for DOT graph')
+
+   parser.add_argument('-p', '--packages', metavar='FILE', type=str,
+                       help='path to packages.txt file [from release]')
+
+   parser.add_argument('-a', '--all', action='store_true',
+                       help='include non-athena dependencies')
+
+   parser.add_argument('-v', '--verbose', action='store_true',
+                       help='print parse errors')
+
+   args = parser.parse_args()
+
+   packages = None
+   if not args.all:
+      package_file = args.packages or os.path.join(os.environ['AtlasArea'],'InstallArea',
+                                                   os.environ['BINARY_TAG'],'packages.txt')
+
+      try:
+         with open(package_file) as f:
+            packages = set(line.rstrip().split('/')[-1] for line in f if not line.startswith('#'))
+      except FileNotFoundError:
+         parser.error(f"Cannot read '{package_file}'. Specify via '-p/--packages' or run with '-a/--all'")
+
+   # By default only show athena packages:
+   filterFnc = None if args.all else lambda p : p in packages
+
+   # Walk source tree and create DOT graph:
+   g = make_graph(walk_tree(args.path, args.verbose, filterFnc), filterFnc)
+
+   if args.output:
+      g.write(args.output)
+   else:
+      print(g)
+
+if __name__ == "__main__":
+   sys.exit(main())
-- 
GitLab