From 1f173506733d456572a5f6ba7b9537ecd6215a2b Mon Sep 17 00:00:00 2001 From: Jakub Wozniak <jakub.wozniak@cern.ch> Date: Wed, 10 Apr 2024 12:00:02 +0200 Subject: [PATCH] Revert "Merge branch 'NXCALS-7436-Race-condition-when-starting-multiple-python' into 'develop'" This reverts merge request !1960 --- .../templates/config/spark-env.sh | 159 ++++++++---------- 1 file changed, 66 insertions(+), 93 deletions(-) diff --git a/ansible/roles/python-wheels-upload/templates/config/spark-env.sh b/ansible/roles/python-wheels-upload/templates/config/spark-env.sh index a5a22f3c69..fcb99ffdf4 100644 --- a/ansible/roles/python-wheels-upload/templates/config/spark-env.sh +++ b/ansible/roles/python-wheels-upload/templates/config/spark-env.sh @@ -72,23 +72,6 @@ # In order to determine that we check the spark-default.conf file and script input arguments ($@). # If you use spark-submit and set this inside the Python script we are unable to determine that. (jwozniak) -NXCALS_VERSION="{{nxcals_version}}" - -#Global error handling -handle_error() { - echo "An error occurred on line $1" - exit 1 -} -trap 'handle_error $LINENO' ERR - -if command -v flock &> /dev/null; then - FLOCK="flock -x 100" - else - echo "flock is not installed, race condition possible if running multiple scripts for a single venv in parallel!" - FLOCK= -fi - - function md5_cmd { if [[ $(uname) == "Darwin" ]]; then md5 @@ -97,7 +80,7 @@ function md5_cmd { fi } - +NXCALS_VERSION="{{nxcals_version}}" function get_writable_dir() { dir=$(dirname "$(mktemp -u)") @@ -114,43 +97,6 @@ if [ ! "$NXCALS_WORKSPACE_TEMP_DIR" ]; then get_writable_dir fi -function pack_venv() { - if [ ! "$NXCALS_PACK_ALL_PACKAGES" ]; then - venv-pack --python-prefix "$PYTHON_PREFIX" --output "$PACKED_VENV_FILE" \ - --exclude nxcals-bundle/nxcals_jars/\* --exclude nxcals-bundle/jars/\* --exclude nxcals-bundle/examples/\* \ - --exclude \*/pyspark/jars/\* --exclude \*/pyspark/examples/\* - else - venv-pack --python-prefix "$PYTHON_PREFIX" --output "$PACKED_VENV_FILE" - fi -} - -function fix_venv() { - echo "Extracing packed venv to fix symlink to exec..." - # Fix packed venv - symlinks to python exec may be broken - FIXED_VENV_DIR="$NXCALS_WORKSPACE_TEMP_DIR/venv" - mkdir -p $FIXED_VENV_DIR - tar -xzf $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz -C $FIXED_VENV_DIR - - echo "Fixing symlink to exec in venv..." - - for file in $FIXED_VENV_DIR/bin/python*; do - if [ -L "$file" ]; then - newTarget=$PYTHON_PREFIX/bin/python$PYTHON_VERSION - rm $file - ln -s $newTarget $file - fi - done - - echo "Packing again venv..." - OLD_PWD=`pwd` - rm $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz - cd $FIXED_VENV_DIR - tar -czf $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz ./* - cd $OLD_PWD - rm -r $FIXED_VENV_DIR -} - - #A must as the pySpark is using those 2 variables to set the python on the executor. Both vars must be set. The driver uses what is the current python3 and the executor must use the LCG. #Exlusion is only for the jupyter setting - it shouldn't be overwritten JUPYTER_PYSPARK_REGEX='^\s*jupyter(\s.*|$)' @@ -166,7 +112,6 @@ OLD_SPARK_CONF_DIR="$SPARK_CONF_DIR" export SPARK_CONF_DIR="$NXCALS_WORKSPACE_TEMP_DIR/conf" PACKED_VENV_FILE="${NXCALS_WORKSPACE_TEMP_DIR}"/{{spark_packed_venv_name}} -LOCK="${NXCALS_WORKSPACE_TEMP_DIR}"/.lock echo "ENVIRONMENT:" echo "NXCALS_VERSION=${NXCALS_VERSION}" @@ -179,7 +124,6 @@ echo "PACKED_VENV_FILE=${PACKED_VENV_FILE}" echo "PYSPARK_PYTHON=${PYSPARK_PYTHON}" echo "PYSPARK_DRIVER_PYTHON=${PYSPARK_DRIVER_PYTHON}" echo "PYSPARK_DRIVER_PYTHON_OPTS=${PYSPARK_DRIVER_PYTHON_OPTS}" -echo "LOCK=${LOCK}" echo echo "IMPORTANT:" echo "Rebuilding of the packed venv is required in cases there are new or modified packages provided by a user. In order to recreate the packed venv please:" @@ -188,41 +132,39 @@ echo " - execute your script again (rebuild will be performed during the echo "Target directory can be set with an env variable NXCALS_WORKSPACE_TEMP_DIR (if not set a temp dir will be used)." echo "Adding NXCALS related files to packed venv can be enabled by setting NXCALS_PACK_ALL_PACKAGES with any value." +if [ ! -e "$NXCALS_WORKSPACE_TEMP_DIR/conf/spark-defaults.conf" ]; then + echo "Copying $SPARK_DEFAULTS to $SPARK_CONF_DIR ..." -( - $FLOCK - if [ ! -e "$NXCALS_WORKSPACE_TEMP_DIR/conf/spark-defaults.conf" ]; then - echo "Copying $SPARK_DEFAULTS to $SPARK_CONF_DIR ..." - mkdir -p "$SPARK_CONF_DIR" + mkdir -p "$SPARK_CONF_DIR" - cp "$OLD_SPARK_CONF_DIR"/spark-defaults.conf "$SPARK_CONF_DIR" - cp "$OLD_SPARK_CONF_DIR"/log4j2.properties "$SPARK_CONF_DIR" - NEW_SPARK_CONF="$SPARK_CONF_DIR/spark-defaults.conf" + if [ $? -ne 0 ]; then + echo "ERROR: could not create $NXCALS_WORKSPACE_TEMP_DIR/conf. Aborting." + exit 1 + fi - # make the spark.jars path absolute otherwise they are relative to the current working directory - # Mac OSX requires escaping brackets + cp "$OLD_SPARK_CONF_DIR"/* "$SPARK_CONF_DIR" + NEW_SPARK_CONF="$SPARK_CONF_DIR/spark-defaults.conf" + # make the spark.jars path absolute otherwise they are relative to the current working directory + # Mac OSX requires escaping brackets + if [[ $(uname) == "Darwin" ]]; then + sed -i -r 's,\([^/]\)nxcals_jars/\([^,]*\),\1'"$SPARK_HOME"'/nxcals_jars/\2,g' "$NEW_SPARK_CONF" + else + sed -i -r 's,([^/])nxcals_jars/([^,]*),\1'"$SPARK_HOME"'/nxcals_jars/\2,g' "$NEW_SPARK_CONF" + fi - if [[ $(uname) == "Darwin" ]]; then - sed -i -r 's,\([^/]\)nxcals_jars/\([^,]*\),\1'"$SPARK_HOME"'/nxcals_jars/\2,g' "$NEW_SPARK_CONF" - else - sed -i -r 's,([^/])nxcals_jars/([^,]*),\1'"$SPARK_HOME"'/nxcals_jars/\2,g' "$NEW_SPARK_CONF" - fi + # Replace the placeholder for the virtual_env path in spark-defaults.conf + # The archive is specified with '#environment' because that is how spark + # knows where to unzip it on the executors under a new directory called environment. + # For further information: http://spark.apache.org/docs/latest/api/python/user_guide/python_packaging.html - # Replace the placeholder for the virtual_env path in spark-defaults.conf - # The archive is specified with '#environment' because that is how spark - # knows where to unzip it on the executors under a new directory called environment. - # For further information: http://spark.apache.org/docs/latest/api/python/user_guide/python_packaging.html - sed -i -r 's@spark.yarn.dist.archives.*@spark.yarn.dist.archives '"$NXCALS_WORKSPACE_TEMP_DIR"'/'{{spark_packed_venv_name}}'#'{{spark_bundle_pyspark_venv_name}}'@g' "$NEW_SPARK_CONF" - fi -) 100>$LOCK + sed -i -r 's@spark.yarn.dist.archives.*@spark.yarn.dist.archives '"$NXCALS_WORKSPACE_TEMP_DIR"'/'{{spark_packed_venv_name}}'#'{{spark_bundle_pyspark_venv_name}}'@g' "$NEW_SPARK_CONF" +fi echo "Trying to determine YARN usage to make Python work correctly (conf/spark-env.sh)..." +grep -q -e "^\s*spark.master\s*yarn" "${SPARK_CONF_DIR}/spark-defaults.conf" -exit_code=0 -grep -q -e "^\s*spark.master\s*yarn" "${SPARK_CONF_DIR}/spark-defaults.conf" || exit_code=$? - -if [[ "${exit_code}" == "0" || $@ =~ .*master.*yarn.* ]]; then +if [[ $? == '0' || $@ =~ .*master.*yarn.* ]]; then echo "Using YARN" # Normally $PYSPARK_PYTHON is set in the spark_session_builder.get_or_create(). # But when user calls pyspark directly, this option is required. @@ -238,17 +180,48 @@ if [[ "${exit_code}" == "0" || $@ =~ .*master.*yarn.* ]]; then echo "ERROR: YARN cluster doesn't support Python in version $PYTHON_VERSION. Supported versions are either 3.9 or 3.11" exit 1 else - ( - $FLOCK - if [[ ! -e "$PACKED_VENV_FILE" ]]; then - echo "Creating packed venv..." - pack_venv - fix_venv - echo -e "Packed venv created.\n...done!" - else - echo "Packed venv already built..." + echo "Creating packed venv..." + + ERROR=0 + + if [ ! "$NXCALS_PACK_ALL_PACKAGES" ]; then + venv-pack --python-prefix "$PYTHON_PREFIX" --output "$PACKED_VENV_FILE" \ + --exclude nxcals-bundle/nxcals_jars/\* --exclude nxcals-bundle/jars/\* --exclude nxcals-bundle/examples/\* \ + --exclude \*/pyspark/jars/\* --exclude \*/pyspark/examples/\* || ERROR=1 + else + venv-pack --python-prefix "$PYTHON_PREFIX" --output "$PACKED_VENV_FILE" || ERROR=1 + fi + + echo "INFO: Extracing packed venv to fix symlink to exec..." + # Fix packed venv - symlinks to python exec may be broken + FIXED_VENV_DIR="$NXCALS_WORKSPACE_TEMP_DIR/venv" + mkdir $FIXED_VENV_DIR + tar -xzf $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz -C $FIXED_VENV_DIR || ERROR=1 + + echo "INFO: Fixing symlink to exec in venv..." + + for file in $FIXED_VENV_DIR/bin/python*; do + if [ -L "$file" ]; then + newTarget=$PYTHON_PREFIX/bin/python$PYTHON_VERSION + rm $file || ERROR=1 + ln -s $newTarget $file || ERROR=1 fi - ) 100>$LOCK + done + + echo "INFO: Packing again venv..." + OLD_PWD=`pwd` + rm $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz + cd $FIXED_VENV_DIR + tar -czf $NXCALS_WORKSPACE_TEMP_DIR/nxcals-python3-env.tar.gz ./* || ERROR=1 + cd $OLD_PWD + rm -r $FIXED_VENV_DIR + + if [ $ERROR -ne 0 ]; then + echo "ERROR: could not create packed venv. Aborting." + exit 3 + else + echo -e "Packed venv created.\n...done!" + fi fi fi echo "IMPORTANT:" -- GitLab