Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
check_added_files 2.79 KiB
#! /usr/bin/env python

import os, sys, re, glob

from jo_utils import bashExec, checkWhitelist, ALLOWED_PATHS
from jo_utils import printError, printWarning, printOK, printInfo


# Array to hold which directories have been checked 
# so that we don't perform directory level checks twice
dirsChecked = [ ]

# function to perform directory level checks
def directoryLevelChecks(newFile):

  # Do not check again if the directory has already been checked
  filedir = os.path.dirname(newFile)
  if filedir in dirsChecked:
    return False
  else:
    dirsChecked.append(filedir)

  # If it's a file in a DSID directory 
  if re.match(r'^[0-9]{6}$', os.path.dirname(newFile).split('/')[-1]):
    # check that only 1 jO is inside the DSID directory
    countJOinDSID = len(glob.glob(f'{filedir}/mc.*.py'))
    if countJOinDSID == 1:
      printOK(f'only 1 jO file found in {filedir}')
    else:
      printError(f'{countJOinDSID} jO files found in {filedir}')
      return True
      
  # If it's a GRID file check that only 1 grid file per COM energy is present
  if 'GRID.tar.gz' in newFile:
    gridpacks = glob.glob(f'{filedir}/*.GRID.tar.gz')
    COMarray = [ f[f.find('_')+1:f.find('.')] for f in gridpacks ] 
    if len(COMarray) != len(set(COMarray)):
      printError(f'{filedir} contains multiple GRID files with the same COM energy')
      for g in gridpacks:
        print(g)
      return True
    else:
      printOK(f"{filedir} doesn't contain multiple GRID files with the same COM energy")
  return False


printInfo('Find files that have been added in last commit...')

# Find number of files that have been:
# Copied (C), Deleted (D), Modified (M), Renamed (R), have their type
# (i.e. regular file, symlink, submodule, …​) changed (T), are Unmerged (U),
# are Unknown (X), or have had their pairing Broken (B).
cmd = 'git diff-tree --no-commit-id --name-only -r origin/master..HEAD --diff-filter=A --find-renames'
files = bashExec(cmd).splitlines()

if len(files) == 0:
  printOK('No files added')
  sys.exit(0)

# Find the hash of the previous commit
sha = bashExec('git log --oneline origin/master').splitlines()[0].split()[0]
# Find all existing DSID directories
existingDSIDs = bashExec(rf'git ls-tree -r --name-only {sha} *xxx')


fail = False
for newfile in files:

  # Check if file is in whitelist
  if checkWhitelist(newfile):
    printOK(f'file: {newfile} in white list')
  else:
    printError(f'file: {newfile} not in white list')
    fail = True

  # Check if the file has been added in a pre-existing DSID directory
  newdir = os.path.dirname(newfile)
  if re.match(newdir, existingDSIDs) and 'GRID.tar.gz' not in newfile:
    printError(f'{newfile} added in pre-existing DSID directory. This is not allowed!')
    fail = True
    
  # Perform directory-level checks
  fail |= directoryLevelChecks(newfile)

sys.exit(int(fail))