diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000000000000000000000000000000000000..640fd80b829a36c6ae5382d4aeeb511d00e869f3 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="PySciProjectComponent"> + <option name="PY_SCI_VIEW" value="true" /> + <option name="PY_SCI_VIEW_SUGGESTED" value="true" /> + </component> +</project> \ No newline at end of file diff --git a/nxcals-hadoop-pro-config-0.1.164.jar b/nxcals-hadoop-pro-config-0.1.164.jar deleted file mode 120000 index 7088c217b2ccca0726d942459ebc9f491a0d9529..0000000000000000000000000000000000000000 --- a/nxcals-hadoop-pro-config-0.1.164.jar +++ /dev/null @@ -1 +0,0 @@ -/home/dredd/Documents/CERN/spark-2.4.0-bin-hadoop2.7/jars/nxcals-hadoop-pro-config-0.1.164.jar \ No newline at end of file diff --git a/nxcals-hadoop-pro-config-0.1.164.jar b/nxcals-hadoop-pro-config-0.1.164.jar new file mode 100644 index 0000000000000000000000000000000000000000..8c0f0935004ac9703afb2b09a3c0f5e1539a9695 Binary files /dev/null and b/nxcals-hadoop-pro-config-0.1.164.jar differ diff --git a/nxcals-jars b/nxcals-jars deleted file mode 120000 index b9ec078c5bf229f025de4d319c83f14543ae0111..0000000000000000000000000000000000000000 --- a/nxcals-jars +++ /dev/null @@ -1 +0,0 @@ -/home/dredd/Documents/CERN/spark-2.4.0-bin-hadoop2.7/nxcals-jars \ No newline at end of file diff --git a/property-pull.py b/property-pull.py index 704ba7bee239529b553c000e4e35bd06a819c97e..91854113af73bd5ca1c5875a0d893e1a4880647d 100644 --- a/property-pull.py +++ b/property-pull.py @@ -40,11 +40,9 @@ sums_cols = udf(lambda arr: 0 if arr == [] else __builtins__.sum(arr), IntegerTy ''' #TODO - -# Check Why Columns is not working -# Check compression ? -# Reduce ( UDA ) -# Scala UDF ? (working with python's lambda for now ) -# Write Brut hito data correctly => ['element'] not OK +# Check compression ? Done => MB optimisation using numpy +# Reduce ( UDAF ? ) +# Scala UDF ? (working with python's lambda for now ) ''' @@ -68,12 +66,12 @@ def pull_histogram(_start_time, _end_time, _device): data = {} df = KeyValuesQuery.builder(spark).system("CMW") \ - .startTime(_start_time) \ - .endTime(_end_time) \ - .entity() \ - .keyValue("device", _device) \ - .keyValue("property", _property) \ - .buildDataset() + .startTime(_start_time) \ + .endTime(_end_time) \ + .entity() \ + .keyValue("device", _device) \ + .keyValue("property", _property) \ + .buildDataset() df.printSchema()