Commit 63ad563e authored by Domenico Giordano's avatar Domenico Giordano
Browse files

Refine names

parent 03999575
......@@ -44,4 +44,7 @@ This is a guide map of the repository:
## From where to start
Detail procedures for newcomers (W.I.P.)
1. For a general introduction on this activity see the [ITTF seminar](https://indico.cern.ch/event/1012703/)
1. For interactive examples see [examples](examples)
1. For Airflow deployment see [control_room](control_room))
......@@ -402,7 +402,7 @@ def data_presence(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining what to download.""")
def download_data(resource_file):
def transform_data(resource_file):
# PREPARE SPARK
sc, spark, conf = spark_preparation()
# READ RESOURCE FILE
......@@ -422,8 +422,8 @@ def download_data(resource_file):
@cli.command()
@click.option('--resource_file', default="",
help="""path to json file defining what to cache.""")
def cache_locally(resource_file):
"""Cache your data locally (aka move them from spark to local disk)."""
def copy_locally(resource_file):
"""Copy your data locally (aka move them from spark to local disk)."""
# PREPARE SPARK
sc, spark, conf = spark_preparation()
# READ RESOURCE FILE
......
......@@ -102,27 +102,27 @@ def ad_etl_dag(dag_id='give_me_a_name', override_params={}):
)
ad_tasks = [
('local_data_presence', 'data_mining data_presence --resource_file ad_config_file.json', 'all_success'),
('normalization_presence', 'data_mining normalization_presence --resource_file ad_config_file.json', 'all_failed'),
('compute_normalization' , 'data_mining compute_normalization --resource_file ad_config_file.json', 'all_failed'),
('spark_etl_to_hdfs', 'data_mining download_data --resource_file ad_config_file.json', 'one_success'),
('from_hdfs_to_local', 'data_mining cache_locally --resource_file ad_config_file.json', 'all_success'),
('check_local_data_presence', 'data_mining data_presence --resource_file ad_config_file.json', 'all_success'),
('spark_normalization_presence', 'data_mining normalization_presence --resource_file ad_config_file.json', 'all_failed'),
('spark_compute_normalization' , 'data_mining compute_normalization --resource_file ad_config_file.json', 'all_failed'),
('spark_transform_data', 'data_mining transform_data --resource_file ad_config_file.json', 'one_success'),
('spark_mv_data_to_local', 'data_mining copy_locally --resource_file ad_config_file.json', 'all_success'),
]
for atask in ad_tasks:
globals()[atask[0]] = return_configured_BashOperator(*atask)
# Start checking the local data presence and in case break pipeline
local_data_presence >> dag_exit_status
check_local_data_presence >> dag_exit_status
# Otherwise if datapresence fails, check the normalization presence
local_data_presence >> normalization_presence
check_local_data_presence >> spark_normalization_presence
# if missing the normalization compute it and then download data
normalization_presence >> compute_normalization >> spark_etl_to_hdfs
spark_normalization_presence >> spark_compute_normalization >> spark_transform_data
# if normalization presence succeeds start immediately downloading data
normalization_presence >> spark_etl_to_hdfs
# Finally cache data locally (#FIXME what does it mean?)
spark_etl_to_hdfs >> from_hdfs_to_local
spark_normalization_presence >> spark_transform_data
# Finally move data to local folder (can also be eos)
spark_transform_data >> spark_mv_data_to_local
from_hdfs_to_local >> dag_exit_status
spark_mv_data_to_local >> dag_exit_status
return dag
......@@ -72,10 +72,10 @@ echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\ntest data_mining downloaddata train\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
data_mining download_data --resource_file /work/tests/adcern/integration/adcern_cfg_train.json || fail 'test data_mining downloaddata'
data_mining transform_data --resource_file /work/tests/adcern/integration/adcern_cfg_train.json || fail 'test data_mining downloaddata'
echo -e "\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo -e "\ntest data_mining downloaddata test\n"
echo -e "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
data_mining download_data --resource_file /work/tests/adcern/integration/adcern_cfg_test.json || fail 'test data_mining downloaddata'
data_mining transform_data --resource_file /work/tests/adcern/integration/adcern_cfg_test.json || fail 'test data_mining downloaddata'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment