Commit 159daa69 authored by Domenico Giordano's avatar Domenico Giordano
Browse files

improve doc

parent dc7594df
......@@ -100,7 +100,9 @@ def ad_etl_dag(dag_id='give_me_a_name', override_params={}):
trigger_rule=trigger_rule,
dag=dag
)
# List of tasks to define based on a BashOperator
# Each entry includes the tuple: (task_id, step of the AD pipeline to call from inside the BashOperator script, trigger rule)
ad_tasks = [
('check_local_data_presence', 'data_mining data_presence --resource_file ad_config_file.json', 'all_success'),
('spark_normalization_presence', 'data_mining normalization_presence --resource_file ad_config_file.json', 'all_failed'),
......@@ -109,9 +111,12 @@ def ad_etl_dag(dag_id='give_me_a_name', override_params={}):
('spark_mv_data_to_local', 'data_mining copy_locally --resource_file ad_config_file.json', 'all_success'),
]
# Instantiate the AD tasks
for atask in ad_tasks:
globals()[atask[0]] = return_configured_BashOperator(*atask)
# Define the task dependency flow
# Start checking the local data presence and in case break pipeline
check_local_data_presence >> dag_exit_status
# Otherwise if datapresence fails, check the normalization presence
......
......@@ -52,7 +52,6 @@ echo -e "\nVerification: running_image is $running_image\n"
cat > docker_run_script.sh <<'EOF'
#!/bin/bash
env
whoami;
pwd;
id
export KRB5CCNAME
......@@ -61,11 +60,6 @@ echo ${config_base_64} | base64 -d > ad_config_file.json;
cat ad_config_file.json;
klist -c {{var.value.KRB5CCNAME}};
local_cache_folder=`python -c "import json; f=open(\"ad_config_file.json\"); print(json.loads(f.read())[\"local_cache_folder\"])"`
echo "local_cache_folder is $local_cache_folder"
#mkdir -p $local_cache_folder;
#[[ "$?" -ne 0 ]] && exit 1;
source set_spark_analytix.sh;
{{params.ad_task}};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment