diff --git a/property-pull.py b/property-pull.py index 4ec678dfd481b618c44d2a1d3845455fc74273c7..ea23c8b6797bb49a1127b36c0e0a856b2d08f2cf 100644 --- a/property-pull.py +++ b/property-pull.py @@ -14,8 +14,8 @@ from pyspark.sql.types import IntegerType import numpy as np -# deprecated, used for file storing _dest_dir = '.' +_files = False # mysqlDB variables mydb = mysql.connector.connect( @@ -139,10 +139,13 @@ def pull_histogram(_start_time, _end_time, _device): + _end_time.replace(' ', '_') + '_' + _property + '.dat', 'w') as outfile: json.dump(data, outfile) - print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' - + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") + if _files: + print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' + + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") +# Java Heap Space realy common on realFill > 10h + def pull_integral(_start_time, _end_time, _device): _property = "LoggingIntegral" @@ -224,8 +227,9 @@ def pull_integral(_start_time, _end_time, _device): + _end_time.replace(' ', '_') + '_' + _property + '.dat', 'w') as outfile: json.dump(data, outfile) - print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' - + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") + if _files: + print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' + + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") # Not Working with too mny dat (dump memory) @@ -291,8 +295,9 @@ def pull_raw_dist(_start_time, _end_time, _device): + _end_time.replace(' ', '_') + '_' + _property + '.dat', 'w') as outfile: json.dump(data, outfile) - print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' - + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") + if _files: + print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' + + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") def pull_integral_dist(_start_time, _end_time, _device): @@ -358,8 +363,9 @@ def pull_integral_dist(_start_time, _end_time, _device): + _end_time.replace(' ', '_') + '_' + _property + '.dat', 'w') as outfile: json.dump(data, outfile) - print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' - + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") + if _files: + print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' + + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") def pull_turnloss(_start_time, _end_time, _device): @@ -444,43 +450,54 @@ def pull_turnloss(_start_time, _end_time, _device): + _end_time.replace(' ', '_') + '_' + _property + '.dat', 'w') as outfile: json.dump(data, outfile) - print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' - + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") + if _files: + print(_dest_dir + '/' + _device + '_' + _start_time.replace(' ', '_') + 'to' + + _end_time.replace(' ', '_') + '_' + _property + '.dat' + " Pulled") if __name__ == '__main__': + _fill_name = "7340" _mode = "STABLE" _start = "2018-10-22 21:18:00.000" _end = "2018-10-23 11:18:00.000" _device = "BLMED.06L7.B1B30_TCPA.A6L7.B1" + _files = False if len(sys.argv) > 1 and sys.argv[1] == '-h': - print("-- HELP -- \n" - "-- TODO -- \n" - "- ------ -") + print(" -- HELP -- \n" + " -- TODO -- \n" + "-args- \n" + " <fillName> : the name of the fill you are pulling, a fill is a complete experience. " + "In one fill there is multiples device, modes, time measurements ... " + "So you'll need to do multiples pulling for the same fill \n" + " <device> : the name of the device you want to pull. If you want to pull multiples device at once," + " enter devices in quotes, separated by spaces \n" + " ex: '<device1> <device2> <device3> ...' \n\n" + " <mode> : the name of the mode you want to pull. \n" + " ex: STABLE, ALL, START, BEAMDUMP ... \n\n" + " <start> : the start time of the pull you want to do. \n" + " format: 'yyyy-mm-dd hh:mm:ss.ms' or 'yyyy-mm-dd_hh:mm:ss.ms'") sys.exit(0) - elif len(sys.argv) < 4: - print("usage: python property-pull.py <device> <mode> <start> <end> \n" + elif len(sys.argv) < 5: + print("usage: python property-pull.py <fillName> <device> <mode> <start> <end> [-f] \n" + "where : \n" + " -f also generate .dat files \n" "type 'python property-pull.py -h' for more infos") sys.exit(1) else: - _device = sys.argv[1] - _mode = sys.argv[2] - _start = sys.argv[3] - _end = sys.argv[4] - - conf = SparkConf() - conf.setMaster('yarn') - conf.setAppName('property-pull.py') - - sc = SparkContext(conf=conf) - sqlContext = SQLContext(sc) - spark = SparkSession(sc) + _fill_name = sys.argv[1] + _device = sys.argv[2] + _mode = sys.argv[3] + _start = sys.argv[4] + _end = sys.argv[5] + if len(sys.argv) > 5 and sys.argv[6] == "-f": + _files = True print('Pulling data for device ' + _device + ' mode ' + _mode + ' from ' + _start + ' to ' + _end) gcursor = mydb.cursor() + gcursor.execute('SELECT id FROM device where name=%s', (_device,)) device_id = gcursor.fetchone()[0] # print(device_id) @@ -492,7 +509,15 @@ if __name__ == '__main__': gcursor.execute('INSERT INTO fill(name, device_id, mode, data_id, start, end)' 'VALUES (%s, %s, %s, %s, %s, %s )', - (_device + _start + 'to' + _end, device_id, _mode, data_id, _start, _end,)) + (_fill_name, device_id, _mode, data_id, _start, _end,)) + + conf = SparkConf() + conf.setMaster('yarn') + conf.setAppName('property-pull.py') + + sc = SparkContext(conf=conf) + sqlContext = SQLContext(sc) + spark = SparkSession(sc) pull_histogram(_start, _end, _device) pull_integral(_start, _end, _device)